system: Rename softmmu/ directory as system/

The softmmu/ directory contains files specific to system emulation. Rename it as system/. Update meson rules, the MAINTAINERS file and all the documentation and comments. Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> Message-ID: <20231004090629.37473-14-philmd@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Philippe Mathieu-Daudé <philmd@linaro.org> 2023-10-04 11:06:28 +0200
committer: Paolo Bonzini <pbonzini@redhat.com> 2023-10-08 21:08:08 +0200
commit: 8d7f2e767d8cd058c817dbe31430b89f2e11535d (patch)
tree: 5b7c25cddf7e6c1b9dfc93c5169541e8c14e2901 /system
parent: 01c85e60a4d0675f0ad203fe1fe119381e7ad15b (diff)
download: qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.zip
qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.tar.gz
qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.tar.bz2
31 files changed, 20360 insertions, 0 deletions
diff --git a/system/arch_init.c b/system/arch_init.c
new file mode 100644
index 0000000..79716f9
--- /dev/null
+++ b/system/arch_init.c
@@ -0,0 +1,50 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "sysemu/arch_init.h"
+
+#ifdef TARGET_SPARC
+int graphic_width = 1024;
+int graphic_height = 768;
+int graphic_depth = 8;
+#elif defined(TARGET_M68K)
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 8;
+#else
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 32;
+#endif
+
+const uint32_t arch_type = QEMU_ARCH;
+
+void qemu_init_arch_modules(void)
+{
+#ifdef CONFIG_MODULES
+    module_init_info(qemu_modinfo);
+    module_allow_arch(TARGET_NAME);
+#endif
+}
diff --git a/system/async-teardown.c b/system/async-teardown.c
new file mode 100644
index 0000000..396963c
--- /dev/null
+++ b/system/async-teardown.c
@@ -0,0 +1,143 @@
+/*
+ * Asynchronous teardown
+ *
+ * Copyright IBM, Corp. 2022
+ *
+ * Authors:
+ *  Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <dirent.h>
+#include <sys/prctl.h>
+#include <sched.h>
+
+#include "qemu/async-teardown.h"
+
+#ifdef _SC_THREAD_STACK_MIN
+#define CLONE_STACK_SIZE sysconf(_SC_THREAD_STACK_MIN)
+#else
+#define CLONE_STACK_SIZE 16384
+#endif
+
+static pid_t the_ppid;
+
+/*
+ * Close all open file descriptors.
+ */
+static void close_all_open_fd(void)
+{
+    struct dirent *de;
+    int fd, dfd;
+    DIR *dir;
+
+#ifdef CONFIG_CLOSE_RANGE
+    int r = close_range(0, ~0U, 0);
+    if (!r) {
+        /* Success, no need to try other ways. */
+        return;
+    }
+#endif
+
+    dir = opendir("/proc/self/fd");
+    if (!dir) {
+        /* If /proc is not mounted, there is nothing that can be done. */
+        return;
+    }
+    /* Avoid closing the directory. */
+    dfd = dirfd(dir);
+
+    for (de = readdir(dir); de; de = readdir(dir)) {
+        fd = atoi(de->d_name);
+        if (fd != dfd) {
+            close(fd);
+        }
+    }
+    closedir(dir);
+}
+
+static void hup_handler(int signal)
+{
+    /* Check every second if this process has been reparented. */
+    while (the_ppid == getppid()) {
+        /* sleep() is safe to use in a signal handler. */
+        sleep(1);
+    }
+
+    /* At this point the parent process has terminated completely. */
+    _exit(0);
+}
+
+static int async_teardown_fn(void *arg)
+{
+    struct sigaction sa = { .sa_handler = hup_handler };
+    sigset_t hup_signal;
+    char name[16];
+
+    /* Set a meaningful name for this process. */
+    snprintf(name, 16, "cleanup/%d", the_ppid);
+    prctl(PR_SET_NAME, (unsigned long)name);
+
+    /*
+     * Close all file descriptors that might have been inherited from the
+     * main qemu process when doing clone, needed to make libvirt happy.
+     * Not using close_range for increased compatibility with older kernels.
+     */
+    close_all_open_fd();
+
+    /* Set up a handler for SIGHUP and unblock SIGHUP. */
+    sigaction(SIGHUP, &sa, NULL);
+    sigemptyset(&hup_signal);
+    sigaddset(&hup_signal, SIGHUP);
+    sigprocmask(SIG_UNBLOCK, &hup_signal, NULL);
+
+    /* Ask to receive SIGHUP when the parent dies. */
+    prctl(PR_SET_PDEATHSIG, SIGHUP);
+
+    /*
+     * Sleep forever, unless the parent process has already terminated. The
+     * only interruption can come from the SIGHUP signal, which in normal
+     * operation is received when the parent process dies.
+     */
+    if (the_ppid == getppid()) {
+        pause();
+    }
+
+    /* At this point the parent process has terminated completely. */
+    _exit(0);
+}
+
+/*
+ * Allocate a new stack of a reasonable size, and return a pointer to its top.
+ */
+static void *new_stack_for_clone(void)
+{
+    size_t stack_size = CLONE_STACK_SIZE;
+    char *stack_ptr;
+
+    /* Allocate a new stack and get a pointer to its top. */
+    stack_ptr = qemu_alloc_stack(&stack_size);
+    stack_ptr += stack_size;
+
+    return stack_ptr;
+}
+
+/*
+ * Block all signals, start (clone) a new process sharing the address space
+ * with qemu (CLONE_VM), then restore signals.
+ */
+void init_async_teardown(void)
+{
+    sigset_t all_signals, old_signals;
+
+    the_ppid = getpid();
+
+    sigfillset(&all_signals);
+    sigprocmask(SIG_BLOCK, &all_signals, &old_signals);
+    clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL);
+    sigprocmask(SIG_SETMASK, &old_signals, NULL);
+}
diff --git a/system/balloon.c b/system/balloon.c
new file mode 100644
index 0000000..e0e8969
--- /dev/null
+++ b/system/balloon.c
@@ -0,0 +1,106 @@
+/*
+ * Generic Balloon handlers and management
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "sysemu/kvm.h"
+#include "sysemu/balloon.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qerror.h"
+#include "trace.h"
+
+static QEMUBalloonEvent *balloon_event_fn;
+static QEMUBalloonStatus *balloon_stat_fn;
+static void *balloon_opaque;
+
+static bool have_balloon(Error **errp)
+{
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
+                  "Using KVM without synchronous MMU, balloon unavailable");
+        return false;
+    }
+    if (!balloon_event_fn) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+                  "No balloon device has been activated");
+        return false;
+    }
+    return true;
+}
+
+int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
+                             QEMUBalloonStatus *stat_func, void *opaque)
+{
+    if (balloon_event_fn || balloon_stat_fn || balloon_opaque) {
+        /* We're already registered one balloon handler.  How many can
+         * a guest really have?
+         */
+        return -1;
+    }
+    balloon_event_fn = event_func;
+    balloon_stat_fn = stat_func;
+    balloon_opaque = opaque;
+    return 0;
+}
+
+void qemu_remove_balloon_handler(void *opaque)
+{
+    if (balloon_opaque != opaque) {
+        return;
+    }
+    balloon_event_fn = NULL;
+    balloon_stat_fn = NULL;
+    balloon_opaque = NULL;
+}
+
+BalloonInfo *qmp_query_balloon(Error **errp)
+{
+    BalloonInfo *info;
+
+    if (!have_balloon(errp)) {
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    balloon_stat_fn(balloon_opaque, info);
+    return info;
+}
+
+void qmp_balloon(int64_t target, Error **errp)
+{
+    if (!have_balloon(errp)) {
+        return;
+    }
+
+    if (target <= 0) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
+        return;
+    }
+
+    trace_balloon_event(balloon_opaque, target);
+    balloon_event_fn(balloon_opaque, target);
+}
diff --git a/system/bootdevice.c b/system/bootdevice.c
new file mode 100644
index 0000000..2106f10
--- /dev/null
+++ b/system/bootdevice.c
@@ -0,0 +1,430 @@
+/*
+ * QEMU Boot Device Implement
+ *
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "sysemu/reset.h"
+#include "hw/qdev-core.h"
+#include "hw/boards.h"
+
+typedef struct FWBootEntry FWBootEntry;
+
+struct FWBootEntry {
+    QTAILQ_ENTRY(FWBootEntry) link;
+    int32_t bootindex;
+    DeviceState *dev;
+    char *suffix;
+};
+
+static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
+    QTAILQ_HEAD_INITIALIZER(fw_boot_order);
+static QEMUBootSetHandler *boot_set_handler;
+static void *boot_set_opaque;
+
+void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
+{
+    boot_set_handler = func;
+    boot_set_opaque = opaque;
+}
+
+void qemu_boot_set(const char *boot_order, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!boot_set_handler) {
+        error_setg(errp, "no function defined to set boot device list for"
+                         " this architecture");
+        return;
+    }
+
+    validate_bootdevices(boot_order, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    boot_set_handler(boot_set_opaque, boot_order, errp);
+}
+
+void validate_bootdevices(const char *devices, Error **errp)
+{
+    /* We just do some generic consistency checks */
+    const char *p;
+    int bitmap = 0;
+
+    for (p = devices; *p != '\0'; p++) {
+        /* Allowed boot devices are:
+         * a-b: floppy disk drives
+         * c-f: IDE disk drives
+         * g-m: machine implementation dependent drives
+         * n-p: network devices
+         * It's up to each machine implementation to check if the given boot
+         * devices match the actual hardware implementation and firmware
+         * features.
+         */
+        if (*p < 'a' || *p > 'p') {
+            error_setg(errp, "Invalid boot device '%c'", *p);
+            return;
+        }
+        if (bitmap & (1 << (*p - 'a'))) {
+            error_setg(errp, "Boot device '%c' was given twice", *p);
+            return;
+        }
+        bitmap |= 1 << (*p - 'a');
+    }
+}
+
+void restore_boot_order(void *opaque)
+{
+    char *normal_boot_order = opaque;
+    static int first = 1;
+
+    /* Restore boot order and remove ourselves after the first boot */
+    if (first) {
+        first = 0;
+        return;
+    }
+
+    if (boot_set_handler) {
+        qemu_boot_set(normal_boot_order, &error_abort);
+    }
+
+    qemu_unregister_reset(restore_boot_order, normal_boot_order);
+    g_free(normal_boot_order);
+}
+
+void check_boot_index(int32_t bootindex, Error **errp)
+{
+    FWBootEntry *i;
+
+    if (bootindex >= 0) {
+        QTAILQ_FOREACH(i, &fw_boot_order, link) {
+            if (i->bootindex == bootindex) {
+                error_setg(errp, "The bootindex %d has already been used",
+                           bootindex);
+                return;
+            }
+        }
+    }
+}
+
+void del_boot_device_path(DeviceState *dev, const char *suffix)
+{
+    FWBootEntry *i;
+
+    if (dev == NULL) {
+        return;
+    }
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+             i->dev == dev) {
+            QTAILQ_REMOVE(&fw_boot_order, i, link);
+            g_free(i->suffix);
+            g_free(i);
+
+            break;
+        }
+    }
+}
+
+void add_boot_device_path(int32_t bootindex, DeviceState *dev,
+                          const char *suffix)
+{
+    FWBootEntry *node, *i;
+
+    if (bootindex < 0) {
+        del_boot_device_path(dev, suffix);
+        return;
+    }
+
+    assert(dev != NULL || suffix != NULL);
+
+    del_boot_device_path(dev, suffix);
+
+    node = g_new0(FWBootEntry, 1);
+    node->bootindex = bootindex;
+    node->suffix = g_strdup(suffix);
+    node->dev = dev;
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        if (i->bootindex == bootindex) {
+            error_report("Two devices with same boot index %d", bootindex);
+            exit(1);
+        } else if (i->bootindex < bootindex) {
+            continue;
+        }
+        QTAILQ_INSERT_BEFORE(i, node, link);
+        return;
+    }
+    QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
+}
+
+DeviceState *get_boot_device(uint32_t position)
+{
+    uint32_t counter = 0;
+    FWBootEntry *i = NULL;
+    DeviceState *res = NULL;
+
+    if (!QTAILQ_EMPTY(&fw_boot_order)) {
+        QTAILQ_FOREACH(i, &fw_boot_order, link) {
+            if (counter == position) {
+                res = i->dev;
+                break;
+            }
+            counter++;
+        }
+    }
+    return res;
+}
+
+static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
+                                  const char *suffix)
+{
+    char *devpath = NULL, *s = NULL, *d, *bootpath;
+
+    if (dev) {
+        devpath = qdev_get_fw_dev_path(dev);
+        assert(devpath);
+    }
+
+    if (!ignore_suffixes) {
+        if (dev) {
+            d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
+            if (d) {
+                assert(!suffix);
+                s = d;
+            } else {
+                s = g_strdup(suffix);
+            }
+        } else {
+            s = g_strdup(suffix);
+        }
+    }
+
+    bootpath = g_strdup_printf("%s%s",
+                               devpath ? devpath : "",
+                               s ? s : "");
+    g_free(devpath);
+    g_free(s);
+
+    return bootpath;
+}
+
+/*
+ * This function returns null terminated string that consist of new line
+ * separated device paths.
+ *
+ * memory pointed by "size" is assigned total length of the array in bytes
+ *
+ */
+char *get_boot_devices_list(size_t *size)
+{
+    FWBootEntry *i;
+    size_t total = 0;
+    char *list = NULL;
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+    bool ignore_suffixes = mc->ignore_boot_device_suffixes;
+
+    QTAILQ_FOREACH(i, &fw_boot_order, link) {
+        char *bootpath;
+        size_t len;
+
+        bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
+
+        if (total) {
+            list[total-1] = '\n';
+        }
+        len = strlen(bootpath) + 1;
+        list = g_realloc(list, total + len);
+        memcpy(&list[total], bootpath, len);
+        total += len;
+        g_free(bootpath);
+    }
+
+    *size = total;
+
+    if (current_machine->boot_config.has_strict &&
+        current_machine->boot_config.strict && *size > 0) {
+        list[total-1] = '\n';
+        list = g_realloc(list, total + 5);
+        memcpy(&list[total], "HALT", 5);
+        *size = total + 5;
+    }
+    return list;
+}
+
+typedef struct {
+    int32_t *bootindex;
+    const char *suffix;
+    DeviceState *dev;
+} BootIndexProperty;
+
+static void device_get_bootindex(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    BootIndexProperty *prop = opaque;
+    visit_type_int32(v, name, prop->bootindex, errp);
+}
+
+static void device_set_bootindex(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    BootIndexProperty *prop = opaque;
+    int32_t boot_index;
+    Error *local_err = NULL;
+
+    if (!visit_type_int32(v, name, &boot_index, errp)) {
+        return;
+    }
+    /* check whether bootindex is present in fw_boot_order list  */
+    check_boot_index(boot_index, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* change bootindex to a new one */
+    *prop->bootindex = boot_index;
+
+    add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix);
+}
+
+static void property_release_bootindex(Object *obj, const char *name,
+                                       void *opaque)
+
+{
+    BootIndexProperty *prop = opaque;
+
+    del_boot_device_path(prop->dev, prop->suffix);
+    g_free(prop);
+}
+
+void device_add_bootindex_property(Object *obj, int32_t *bootindex,
+                                   const char *name, const char *suffix,
+                                   DeviceState *dev)
+{
+    BootIndexProperty *prop = g_malloc0(sizeof(*prop));
+
+    prop->bootindex = bootindex;
+    prop->suffix = suffix;
+    prop->dev = dev;
+
+    object_property_add(obj, name, "int32",
+                        device_get_bootindex,
+                        device_set_bootindex,
+                        property_release_bootindex,
+                        prop);
+
+    /* initialize devices' bootindex property to -1 */
+    object_property_set_int(obj, name, -1, NULL);
+}
+
+typedef struct FWLCHSEntry FWLCHSEntry;
+
+struct FWLCHSEntry {
+    QTAILQ_ENTRY(FWLCHSEntry) link;
+    DeviceState *dev;
+    char *suffix;
+    uint32_t lcyls;
+    uint32_t lheads;
+    uint32_t lsecs;
+};
+
+static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs =
+    QTAILQ_HEAD_INITIALIZER(fw_lchs);
+
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+                          uint32_t lcyls, uint32_t lheads, uint32_t lsecs)
+{
+    FWLCHSEntry *node;
+
+    if (!lcyls && !lheads && !lsecs) {
+        return;
+    }
+
+    assert(dev != NULL || suffix != NULL);
+
+    node = g_new0(FWLCHSEntry, 1);
+    node->suffix = g_strdup(suffix);
+    node->dev = dev;
+    node->lcyls = lcyls;
+    node->lheads = lheads;
+    node->lsecs = lsecs;
+
+    QTAILQ_INSERT_TAIL(&fw_lchs, node, link);
+}
+
+void del_boot_device_lchs(DeviceState *dev, const char *suffix)
+{
+    FWLCHSEntry *i;
+
+    if (dev == NULL) {
+        return;
+    }
+
+    QTAILQ_FOREACH(i, &fw_lchs, link) {
+        if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+             i->dev == dev) {
+            QTAILQ_REMOVE(&fw_lchs, i, link);
+            g_free(i->suffix);
+            g_free(i);
+
+            break;
+        }
+    }
+}
+
+char *get_boot_devices_lchs_list(size_t *size)
+{
+    FWLCHSEntry *i;
+    size_t total = 0;
+    char *list = NULL;
+
+    QTAILQ_FOREACH(i, &fw_lchs, link) {
+        char *bootpath;
+        char *chs_string;
+        size_t len;
+
+        bootpath = get_boot_device_path(i->dev, false, i->suffix);
+        chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32,
+                                     bootpath, i->lcyls, i->lheads, i->lsecs);
+
+        if (total) {
+            list[total - 1] = '\n';
+        }
+        len = strlen(chs_string) + 1;
+        list = g_realloc(list, total + len);
+        memcpy(&list[total], chs_string, len);
+        total += len;
+        g_free(chs_string);
+        g_free(bootpath);
+    }
+
+    *size = total;
+
+    return list;
+}
diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c
new file mode 100644
index 0000000..d9bb30a
--- /dev/null
+++ b/system/cpu-throttle.c
@@ -0,0 +1,128 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "hw/core/cpu.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
+#include "sysemu/cpu-throttle.h"
+
+/* vcpu throttling controls */
+static QEMUTimer *throttle_timer;
+static unsigned int throttle_percentage;
+
+#define CPU_THROTTLE_PCT_MIN 1
+#define CPU_THROTTLE_PCT_MAX 99
+#define CPU_THROTTLE_TIMESLICE_NS 10000000
+
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
+{
+    double pct;
+    double throttle_ratio;
+    int64_t sleeptime_ns, endtime_ns;
+
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+
+    pct = (double)cpu_throttle_get_percentage() / 100;
+    throttle_ratio = pct / (1 - pct);
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    while (sleeptime_ns > 0 && !cpu->stop) {
+        if (sleeptime_ns > SCALE_MS) {
+            qemu_cond_timedwait_iothread(cpu->halt_cond,
+                                         sleeptime_ns / SCALE_MS);
+        } else {
+            qemu_mutex_unlock_iothread();
+            g_usleep(sleeptime_ns / SCALE_US);
+            qemu_mutex_lock_iothread();
+        }
+        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
+    qatomic_set(&cpu->throttle_thread_scheduled, 0);
+}
+
+static void cpu_throttle_timer_tick(void *opaque)
+{
+    CPUState *cpu;
+    double pct;
+
+    /* Stop the timer if needed */
+    if (!cpu_throttle_get_percentage()) {
+        return;
+    }
+    CPU_FOREACH(cpu) {
+        if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
+            async_run_on_cpu(cpu, cpu_throttle_thread,
+                             RUN_ON_CPU_NULL);
+        }
+    }
+
+    pct = (double)cpu_throttle_get_percentage() / 100;
+    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+                                   CPU_THROTTLE_TIMESLICE_NS / (1 - pct));
+}
+
+void cpu_throttle_set(int new_throttle_pct)
+{
+    /*
+     * boolean to store whether throttle is already active or not,
+     * before modifying throttle_percentage
+     */
+    bool throttle_active = cpu_throttle_active();
+
+    /* Ensure throttle percentage is within valid range */
+    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
+    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
+
+    qatomic_set(&throttle_percentage, new_throttle_pct);
+
+    if (!throttle_active) {
+        cpu_throttle_timer_tick(NULL);
+    }
+}
+
+void cpu_throttle_stop(void)
+{
+    qatomic_set(&throttle_percentage, 0);
+}
+
+bool cpu_throttle_active(void)
+{
+    return (cpu_throttle_get_percentage() != 0);
+}
+
+int cpu_throttle_get_percentage(void)
+{
+    return qatomic_read(&throttle_percentage);
+}
+
+void cpu_throttle_init(void)
+{
+    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                  cpu_throttle_timer_tick, NULL);
+}
diff --git a/system/cpu-timers.c b/system/cpu-timers.c
new file mode 100644
index 0000000..7452d97
--- /dev/null
+++ b/system/cpu-timers.c
@@ -0,0 +1,277 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qemu/seqlock.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "hw/core/cpu.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/cpu-throttle.h"
+#include "sysemu/cpu-timers-internal.h"
+
+/* clock and ticks */
+
+static int64_t cpu_get_ticks_locked(void)
+{
+    int64_t ticks = timers_state.cpu_ticks_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        ticks += cpu_get_host_ticks();
+    }
+
+    if (timers_state.cpu_ticks_prev > ticks) {
+        /* Non increasing ticks may happen if the host uses software suspend. */
+        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+        ticks = timers_state.cpu_ticks_prev;
+    }
+
+    timers_state.cpu_ticks_prev = ticks;
+    return ticks;
+}
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.
+ * cpu_get_ticks() uses units of the host CPU cycle counter.
+ */
+int64_t cpu_get_ticks(void)
+{
+    int64_t ticks;
+
+    qemu_spin_lock(&timers_state.vm_clock_lock);
+    ticks = cpu_get_ticks_locked();
+    qemu_spin_unlock(&timers_state.vm_clock_lock);
+    return ticks;
+}
+
+int64_t cpu_get_clock_locked(void)
+{
+    int64_t time;
+
+    time = timers_state.cpu_clock_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        time += get_clock();
+    }
+
+    return time;
+}
+
+/*
+ * Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
+int64_t cpu_get_clock(void)
+{
+    int64_t ti;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        ti = cpu_get_clock_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return ti;
+}
+
+/*
+ * enable cpu_get_ticks()
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_enable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (!timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
+        timers_state.cpu_clock_offset -= get_clock();
+        timers_state.cpu_ticks_enabled = 1;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+}
+
+/*
+ * disable cpu_get_ticks() : the clock is stopped. You must not call
+ * cpu_get_ticks() after that.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
+ */
+void cpu_disable_ticks(void)
+{
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+    if (timers_state.cpu_ticks_enabled) {
+        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
+        timers_state.cpu_clock_offset = cpu_get_clock_locked();
+        timers_state.cpu_ticks_enabled = 0;
+    }
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
+}
+
+static bool icount_state_needed(void *opaque)
+{
+    return icount_enabled();
+}
+
+static bool warp_timer_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_warp_timer != NULL;
+}
+
+static bool adjust_timers_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_rt_timer != NULL;
+}
+
+static bool icount_shift_state_needed(void *opaque)
+{
+    return icount_enabled() == 2;
+}
+
+/*
+ * Subsection for warp timer migration is optional, because may not be created
+ */
+static const VMStateDescription icount_vmstate_warp_timer = {
+    .name = "timer/icount/warp_timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = warp_timer_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(vm_clock_warp_start, TimersState),
+        VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_adjust_timers = {
+    .name = "timer/icount/timers",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = adjust_timers_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
+        VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_shift = {
+    .name = "timer/icount/shift",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .needed = icount_shift_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT16(icount_time_shift, TimersState),
+        VMSTATE_INT64(last_delta, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/*
+ * This is a subsection for icount migration.
+ */
+static const VMStateDescription icount_vmstate_timers = {
+    .name = "timer/icount",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = icount_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(qemu_icount_bias, TimersState),
+        VMSTATE_INT64(qemu_icount, TimersState),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_warp_timer,
+        &icount_vmstate_adjust_timers,
+        &icount_vmstate_shift,
+        NULL
+    }
+};
+
+static const VMStateDescription vmstate_timers = {
+    .name = "timer",
+    .version_id = 2,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(cpu_ticks_offset, TimersState),
+        VMSTATE_UNUSED(8),
+        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &icount_vmstate_timers,
+        NULL
+    }
+};
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
+{
+}
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    if (!icount_enabled() || type != QEMU_CLOCK_VIRTUAL) {
+        qemu_notify_event();
+        return;
+    }
+
+    if (qemu_in_vcpu_thread()) {
+        /*
+         * A CPU is currently running; kick it back out to the
+         * tcg_cpu_exec() loop so it will recalculate its
+         * icount deadline immediately.
+         */
+        qemu_cpu_kick(current_cpu);
+    } else if (first_cpu) {
+        /*
+         * qemu_cpu_kick is not enough to kick a halted CPU out of
+         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
+         * causes cpu_thread_is_idle to return false.  This way,
+         * handle_icount_deadline can run.
+         * If we have no CPUs at all for some reason, we don't
+         * need to do anything.
+         */
+        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
+}
+
+TimersState timers_state;
+
+/* initialize timers state and the cpu throttle for convenience */
+void cpu_timers_init(void)
+{
+    seqlock_init(&timers_state.vm_clock_seqlock);
+    qemu_spin_init(&timers_state.vm_clock_lock);
+    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
+
+    cpu_throttle_init();
+}
diff --git a/system/cpus.c b/system/cpus.c
new file mode 100644
index 0000000..0848e0d
--- /dev/null
+++ b/system/cpus.c
@@ -0,0 +1,822 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "qemu/coroutine-tls.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-events-run-state.h"
+#include "qapi/qmp/qerror.h"
+#include "exec/gdbstub.h"
+#include "sysemu/hw_accel.h"
+#include "exec/cpu-common.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "qemu/plugin.h"
+#include "sysemu/cpus.h"
+#include "qemu/guest-random.h"
+#include "hw/nmi.h"
+#include "sysemu/replay.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/whpx.h"
+#include "hw/boards.h"
+#include "hw/hw.h"
+#include "trace.h"
+
+#ifdef CONFIG_LINUX
+
+#include <sys/prctl.h>
+
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
+#ifndef PR_MCE_KILL_SET
+#define PR_MCE_KILL_SET 1
+#endif
+
+#ifndef PR_MCE_KILL_EARLY
+#define PR_MCE_KILL_EARLY 1
+#endif
+
+#endif /* CONFIG_LINUX */
+
+static QemuMutex qemu_global_mutex;
+
+/*
+ * The chosen accelerator is supposed to register this.
+ */
+static const AccelOpsClass *cpus_accel;
+
+bool cpu_is_stopped(CPUState *cpu)
+{
+    return cpu->stopped || !runstate_is_running();
+}
+
+bool cpu_work_list_empty(CPUState *cpu)
+{
+    return QSIMPLEQ_EMPTY_ATOMIC(&cpu->work_list);
+}
+
+bool cpu_thread_is_idle(CPUState *cpu)
+{
+    if (cpu->stop || !cpu_work_list_empty(cpu)) {
+        return false;
+    }
+    if (cpu_is_stopped(cpu)) {
+        return true;
+    }
+    if (!cpu->halted || cpu_has_work(cpu)) {
+        return false;
+    }
+    if (cpus_accel->cpu_thread_is_idle) {
+        return cpus_accel->cpu_thread_is_idle(cpu);
+    }
+    return true;
+}
+
+bool all_cpu_threads_idle(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (!cpu_thread_is_idle(cpu)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/***********************************************************/
+void hw_error(const char *fmt, ...)
+{
+    va_list ap;
+    CPUState *cpu;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "qemu: hardware error: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    CPU_FOREACH(cpu) {
+        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
+        cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
+    }
+    va_end(ap);
+    abort();
+}
+
+void cpu_synchronize_all_states(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_state(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_reset(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_post_reset(cpu);
+    }
+}
+
+void cpu_synchronize_all_post_init(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_post_init(cpu);
+    }
+}
+
+void cpu_synchronize_all_pre_loadvm(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_pre_loadvm(cpu);
+    }
+}
+
+void cpu_synchronize_state(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_state) {
+        cpus_accel->synchronize_state(cpu);
+    }
+}
+
+void cpu_synchronize_post_reset(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_post_reset) {
+        cpus_accel->synchronize_post_reset(cpu);
+    }
+}
+
+void cpu_synchronize_post_init(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_post_init) {
+        cpus_accel->synchronize_post_init(cpu);
+    }
+}
+
+void cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+    if (cpus_accel->synchronize_pre_loadvm) {
+        cpus_accel->synchronize_pre_loadvm(cpu);
+    }
+}
+
+bool cpus_are_resettable(void)
+{
+    if (cpus_accel->cpus_are_resettable) {
+        return cpus_accel->cpus_are_resettable();
+    }
+    return true;
+}
+
+int64_t cpus_get_virtual_clock(void)
+{
+    /*
+     * XXX
+     *
+     * need to check that cpus_accel is not NULL, because qcow2 calls
+     * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and
+     * with ticks disabled in some io-tests:
+     * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267
+     *
+     * is this expected?
+     *
+     * XXX
+     */
+    if (cpus_accel && cpus_accel->get_virtual_clock) {
+        return cpus_accel->get_virtual_clock();
+    }
+    return cpu_get_clock();
+}
+
+/*
+ * return the time elapsed in VM between vm_start and vm_stop.  Unless
+ * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle
+ * counter.
+ */
+int64_t cpus_get_elapsed_ticks(void)
+{
+    if (cpus_accel->get_elapsed_ticks) {
+        return cpus_accel->get_elapsed_ticks();
+    }
+    return cpu_get_ticks();
+}
+
+static void generic_handle_interrupt(CPUState *cpu, int mask)
+{
+    cpu->interrupt_request |= mask;
+
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+    }
+}
+
+void cpu_interrupt(CPUState *cpu, int mask)
+{
+    if (cpus_accel->handle_interrupt) {
+        cpus_accel->handle_interrupt(cpu, mask);
+    } else {
+        generic_handle_interrupt(cpu, mask);
+    }
+}
+
+static int do_vm_stop(RunState state, bool send_stop)
+{
+    int ret = 0;
+
+    if (runstate_is_running()) {
+        runstate_set(state);
+        cpu_disable_ticks();
+        pause_all_vcpus();
+        vm_state_notify(0, state);
+        if (send_stop) {
+            qapi_event_send_stop();
+        }
+    }
+
+    bdrv_drain_all();
+    ret = bdrv_flush_all();
+    trace_vm_stop_flush_all(ret);
+
+    return ret;
+}
+
+/* Special vm_stop() variant for terminating the process.  Historically clients
+ * did not expect a QMP STOP event and so we need to retain compatibility.
+ */
+int vm_shutdown(void)
+{
+    return do_vm_stop(RUN_STATE_SHUTDOWN, false);
+}
+
+bool cpu_can_run(CPUState *cpu)
+{
+    if (cpu->stop) {
+        return false;
+    }
+    if (cpu_is_stopped(cpu)) {
+        return false;
+    }
+    return true;
+}
+
+void cpu_handle_guest_debug(CPUState *cpu)
+{
+    if (replay_running_debug()) {
+        if (!cpu->singlestep_enabled) {
+            /*
+             * Report about the breakpoint and
+             * make a single step to skip it
+             */
+            replay_breakpoint();
+            cpu_single_step(cpu, SSTEP_ENABLE);
+        } else {
+            cpu_single_step(cpu, 0);
+        }
+    } else {
+        gdb_set_stop_cpu(cpu);
+        qemu_system_debug_request();
+        cpu->stopped = true;
+    }
+}
+
+#ifdef CONFIG_LINUX
+static void sigbus_reraise(void)
+{
+    sigset_t set;
+    struct sigaction action;
+
+    memset(&action, 0, sizeof(action));
+    action.sa_handler = SIG_DFL;
+    if (!sigaction(SIGBUS, &action, NULL)) {
+        raise(SIGBUS);
+        sigemptyset(&set);
+        sigaddset(&set, SIGBUS);
+        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+    }
+    perror("Failed to re-raise SIGBUS!");
+    abort();
+}
+
+static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
+{
+    if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
+        sigbus_reraise();
+    }
+
+    if (current_cpu) {
+        /* Called asynchronously in VCPU thread.  */
+        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
+    } else {
+        /* Called synchronously (via signalfd) in main thread.  */
+        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
+    }
+}
+
+static void qemu_init_sigbus(void)
+{
+    struct sigaction action;
+
+    /*
+     * ALERT: when modifying this, take care that SIGBUS forwarding in
+     * qemu_prealloc_mem() will continue working as expected.
+     */
+    memset(&action, 0, sizeof(action));
+    action.sa_flags = SA_SIGINFO;
+    action.sa_sigaction = sigbus_handler;
+    sigaction(SIGBUS, &action, NULL);
+
+    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
+}
+#else /* !CONFIG_LINUX */
+static void qemu_init_sigbus(void)
+{
+}
+#endif /* !CONFIG_LINUX */
+
+static QemuThread io_thread;
+
+/* cpu creation */
+static QemuCond qemu_cpu_cond;
+/* system init */
+static QemuCond qemu_pause_cond;
+
+void qemu_init_cpu_loop(void)
+{
+    qemu_init_sigbus();
+    qemu_cond_init(&qemu_cpu_cond);
+    qemu_cond_init(&qemu_pause_cond);
+    qemu_mutex_init(&qemu_global_mutex);
+
+    qemu_thread_get_self(&io_thread);
+}
+
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
+{
+    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
+}
+
+static void qemu_cpu_stop(CPUState *cpu, bool exit)
+{
+    g_assert(qemu_cpu_is_self(cpu));
+    cpu->stop = false;
+    cpu->stopped = true;
+    if (exit) {
+        cpu_exit(cpu);
+    }
+    qemu_cond_broadcast(&qemu_pause_cond);
+}
+
+void qemu_wait_io_event_common(CPUState *cpu)
+{
+    qatomic_set_mb(&cpu->thread_kicked, false);
+    if (cpu->stop) {
+        qemu_cpu_stop(cpu, false);
+    }
+    process_queued_cpu_work(cpu);
+}
+
+void qemu_wait_io_event(CPUState *cpu)
+{
+    bool slept = false;
+
+    while (cpu_thread_is_idle(cpu)) {
+        if (!slept) {
+            slept = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+    }
+    if (slept) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
+
+    qemu_wait_io_event_common(cpu);
+}
+
+void cpus_kick_thread(CPUState *cpu)
+{
+    if (cpu->thread_kicked) {
+        return;
+    }
+    cpu->thread_kicked = true;
+
+#ifndef _WIN32
+    int err = pthread_kill(cpu->thread->thread, SIG_IPI);
+    if (err && err != ESRCH) {
+        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+        exit(1);
+    }
+#else
+    qemu_sem_post(&cpu->sem);
+#endif
+}
+
+void qemu_cpu_kick(CPUState *cpu)
+{
+    qemu_cond_broadcast(cpu->halt_cond);
+    if (cpus_accel->kick_vcpu_thread) {
+        cpus_accel->kick_vcpu_thread(cpu);
+    } else { /* default */
+        cpus_kick_thread(cpu);
+    }
+}
+
+void qemu_cpu_kick_self(void)
+{
+    assert(current_cpu);
+    cpus_kick_thread(current_cpu);
+}
+
+bool qemu_cpu_is_self(CPUState *cpu)
+{
+    return qemu_thread_is_self(cpu->thread);
+}
+
+bool qemu_in_vcpu_thread(void)
+{
+    return current_cpu && qemu_cpu_is_self(current_cpu);
+}
+
+QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
+
+bool qemu_mutex_iothread_locked(void)
+{
+    return get_iothread_locked();
+}
+
+bool qemu_in_main_thread(void)
+{
+    return qemu_mutex_iothread_locked();
+}
+
+/*
+ * The BQL is taken from so many places that it is worth profiling the
+ * callers directly, instead of funneling them all through a single function.
+ */
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
+{
+    QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func);
+
+    g_assert(!qemu_mutex_iothread_locked());
+    bql_lock(&qemu_global_mutex, file, line);
+    set_iothread_locked(true);
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+    g_assert(qemu_mutex_iothread_locked());
+    set_iothread_locked(false);
+    qemu_mutex_unlock(&qemu_global_mutex);
+}
+
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+    qemu_cond_wait(cond, &qemu_global_mutex);
+}
+
+void qemu_cond_timedwait_iothread(QemuCond *cond, int ms)
+{
+    qemu_cond_timedwait(cond, &qemu_global_mutex, ms);
+}
+
+/* signal CPU creation */
+void cpu_thread_signal_created(CPUState *cpu)
+{
+    cpu->created = true;
+    qemu_cond_signal(&qemu_cpu_cond);
+}
+
+/* signal CPU destruction */
+void cpu_thread_signal_destroyed(CPUState *cpu)
+{
+    cpu->created = false;
+    qemu_cond_signal(&qemu_cpu_cond);
+}
+
+
+static bool all_vcpus_paused(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (!cpu->stopped) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void pause_all_vcpus(void)
+{
+    CPUState *cpu;
+
+    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
+    CPU_FOREACH(cpu) {
+        if (qemu_cpu_is_self(cpu)) {
+            qemu_cpu_stop(cpu, true);
+        } else {
+            cpu->stop = true;
+            qemu_cpu_kick(cpu);
+        }
+    }
+
+    /* We need to drop the replay_lock so any vCPU threads woken up
+     * can finish their replay tasks
+     */
+    replay_mutex_unlock();
+
+    while (!all_vcpus_paused()) {
+        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
+        CPU_FOREACH(cpu) {
+            qemu_cpu_kick(cpu);
+        }
+    }
+
+    qemu_mutex_unlock_iothread();
+    replay_mutex_lock();
+    qemu_mutex_lock_iothread();
+}
+
+void cpu_resume(CPUState *cpu)
+{
+    cpu->stop = false;
+    cpu->stopped = false;
+    qemu_cpu_kick(cpu);
+}
+
+void resume_all_vcpus(void)
+{
+    CPUState *cpu;
+
+    if (!runstate_is_running()) {
+        return;
+    }
+
+    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
+    CPU_FOREACH(cpu) {
+        cpu_resume(cpu);
+    }
+}
+
+void cpu_remove_sync(CPUState *cpu)
+{
+    cpu->stop = true;
+    cpu->unplug = true;
+    qemu_cpu_kick(cpu);
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(cpu->thread);
+    qemu_mutex_lock_iothread();
+}
+
+void cpus_register_accel(const AccelOpsClass *ops)
+{
+    assert(ops != NULL);
+    assert(ops->create_vcpu_thread != NULL); /* mandatory */
+    cpus_accel = ops;
+}
+
+const AccelOpsClass *cpus_get_accel(void)
+{
+    /* broken if we call this early */
+    assert(cpus_accel);
+    return cpus_accel;
+}
+
+void qemu_init_vcpu(CPUState *cpu)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    cpu->nr_cores = ms->smp.cores;
+    cpu->nr_threads =  ms->smp.threads;
+    cpu->stopped = true;
+    cpu->random_seed = qemu_guest_random_seed_thread_part1();
+
+    if (!cpu->as) {
+        /* If the target cpu hasn't set up any address spaces itself,
+         * give it the default one.
+         */
+        cpu->num_ases = 1;
+        cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
+    }
+
+    /* accelerators all implement the AccelOpsClass */
+    g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
+    cpus_accel->create_vcpu_thread(cpu);
+
+    while (!cpu->created) {
+        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+    }
+}
+
+void cpu_stop_current(void)
+{
+    if (current_cpu) {
+        current_cpu->stop = true;
+        cpu_exit(current_cpu);
+    }
+}
+
+int vm_stop(RunState state)
+{
+    if (qemu_in_vcpu_thread()) {
+        qemu_system_vmstop_request_prepare();
+        qemu_system_vmstop_request(state);
+        /*
+         * FIXME: should not return to device code in case
+         * vm_stop() has been requested.
+         */
+        cpu_stop_current();
+        return 0;
+    }
+
+    return do_vm_stop(state, true);
+}
+
+/**
+ * Prepare for (re)starting the VM.
+ * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
+ * running or in case of an error condition), 0 otherwise.
+ */
+int vm_prepare_start(bool step_pending)
+{
+    RunState requested;
+
+    qemu_vmstop_requested(&requested);
+    if (runstate_is_running() && requested == RUN_STATE__MAX) {
+        return -1;
+    }
+
+    /* Ensure that a STOP/RESUME pair of events is emitted if a
+     * vmstop request was pending.  The BLOCK_IO_ERROR event, for
+     * example, according to documentation is always followed by
+     * the STOP event.
+     */
+    if (runstate_is_running()) {
+        qapi_event_send_stop();
+        qapi_event_send_resume();
+        return -1;
+    }
+
+    /*
+     * WHPX accelerator needs to know whether we are going to step
+     * any CPUs, before starting the first one.
+     */
+    if (cpus_accel->synchronize_pre_resume) {
+        cpus_accel->synchronize_pre_resume(step_pending);
+    }
+
+    /* We are sending this now, but the CPUs will be resumed shortly later */
+    qapi_event_send_resume();
+
+    cpu_enable_ticks();
+    runstate_set(RUN_STATE_RUNNING);
+    vm_state_notify(1, RUN_STATE_RUNNING);
+    return 0;
+}
+
+void vm_start(void)
+{
+    if (!vm_prepare_start(false)) {
+        resume_all_vcpus();
+    }
+}
+
+/* does a state transition even if the VM is already stopped,
+   current state is forgotten forever */
+int vm_stop_force_state(RunState state)
+{
+    if (runstate_is_running()) {
+        return vm_stop(state);
+    } else {
+        int ret;
+        runstate_set(state);
+
+        bdrv_drain_all();
+        /* Make sure to return an error if the flush in a previous vm_stop()
+         * failed. */
+        ret = bdrv_flush_all();
+        trace_vm_stop_flush_all(ret);
+        return ret;
+    }
+}
+
+void qmp_memsave(int64_t addr, int64_t size, const char *filename,
+                 bool has_cpu, int64_t cpu_index, Error **errp)
+{
+    FILE *f;
+    uint32_t l;
+    CPUState *cpu;
+    uint8_t buf[1024];
+    int64_t orig_addr = addr, orig_size = size;
+
+    if (!has_cpu) {
+        cpu_index = 0;
+    }
+
+    cpu = qemu_get_cpu(cpu_index);
+    if (cpu == NULL) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+                   "a CPU number");
+        return;
+    }
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        error_setg_file_open(errp, errno, filename);
+        return;
+    }
+
+    while (size != 0) {
+        l = sizeof(buf);
+        if (l > size)
+            l = size;
+        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
+            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
+                             " specified", orig_addr, orig_size);
+            goto exit;
+        }
+        if (fwrite(buf, 1, l, f) != l) {
+            error_setg(errp, QERR_IO_ERROR);
+            goto exit;
+        }
+        addr += l;
+        size -= l;
+    }
+
+exit:
+    fclose(f);
+}
+
+void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
+                  Error **errp)
+{
+    FILE *f;
+    uint32_t l;
+    uint8_t buf[1024];
+
+    f = fopen(filename, "wb");
+    if (!f) {
+        error_setg_file_open(errp, errno, filename);
+        return;
+    }
+
+    while (size != 0) {
+        l = sizeof(buf);
+        if (l > size)
+            l = size;
+        cpu_physical_memory_read(addr, buf, l);
+        if (fwrite(buf, 1, l, f) != l) {
+            error_setg(errp, QERR_IO_ERROR);
+            goto exit;
+        }
+        addr += l;
+        size -= l;
+    }
+
+exit:
+    fclose(f);
+}
+
+void qmp_inject_nmi(Error **errp)
+{
+    nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
+}
+
diff --git a/system/datadir.c b/system/datadir.c
new file mode 100644
index 0000000..c9237cb
--- /dev/null
+++ b/system/datadir.c
@@ -0,0 +1,110 @@
+/*
+ * QEMU firmware and keymap file search
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/datadir.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+
+static const char *data_dir[16];
+static int data_dir_idx;
+
+char *qemu_find_file(int type, const char *name)
+{
+    int i;
+    const char *subdir;
+    char *buf;
+
+    /* Try the name as a straight path first */
+    if (access(name, R_OK) == 0) {
+        trace_load_file(name, name);
+        return g_strdup(name);
+    }
+
+    switch (type) {
+    case QEMU_FILE_TYPE_BIOS:
+        subdir = "";
+        break;
+    case QEMU_FILE_TYPE_KEYMAP:
+        subdir = "keymaps/";
+        break;
+    default:
+        abort();
+    }
+
+    for (i = 0; i < data_dir_idx; i++) {
+        buf = g_strdup_printf("%s/%s%s", data_dir[i], subdir, name);
+        if (access(buf, R_OK) == 0) {
+            trace_load_file(name, buf);
+            return buf;
+        }
+        g_free(buf);
+    }
+    return NULL;
+}
+
+void qemu_add_data_dir(char *path)
+{
+    int i;
+
+    if (path == NULL) {
+        return;
+    }
+    if (data_dir_idx == ARRAY_SIZE(data_dir)) {
+        return;
+    }
+    for (i = 0; i < data_dir_idx; i++) {
+        if (strcmp(data_dir[i], path) == 0) {
+            g_free(path); /* duplicate */
+            return;
+        }
+    }
+    data_dir[data_dir_idx++] = path;
+}
+
+void qemu_add_default_firmwarepath(void)
+{
+    static const char * const dirs[] = {
+        CONFIG_QEMU_FIRMWAREPATH
+        NULL
+    };
+
+    size_t i;
+
+    /* add configured firmware directories */
+    for (i = 0; dirs[i] != NULL; i++) {
+        qemu_add_data_dir(get_relocated_path(dirs[i]));
+    }
+
+    /* try to find datadir relative to the executable path */
+    qemu_add_data_dir(get_relocated_path(CONFIG_QEMU_DATADIR));
+}
+
+void qemu_list_data_dirs(void)
+{
+    int i;
+    for (i = 0; i < data_dir_idx; i++) {
+        printf("%s\n", data_dir[i]);
+    }
+}
diff --git a/system/device_tree.c b/system/device_tree.c
new file mode 100644
index 0000000..eb5166c
--- /dev/null
+++ b/system/device_tree.c
@@ -0,0 +1,703 @@
+/*
+ * Functions to help device tree manipulation using libfdt.
+ * It also provides functions to read entries from device tree proc
+ * interface.
+ *
+ * Copyright 2008 IBM Corporation.
+ * Authors: Jerone Young <jyoung5@us.ibm.com>
+ *          Hollis Blanchard <hollisb@us.ibm.com>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_LINUX
+#include <dirent.h>
+#endif
+
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/bswap.h"
+#include "qemu/cutils.h"
+#include "qemu/guest-random.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "hw/boards.h"
+#include "qemu/config-file.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
+#include "monitor/hmp.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE  0x100000
+
+void *create_device_tree(int *sizep)
+{
+    void *fdt;
+    int ret;
+
+    *sizep = FDT_MAX_SIZE;
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    ret = fdt_create(fdt, FDT_MAX_SIZE);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_finish_reservemap(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_begin_node(fdt, "");
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_end_node(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_finish(fdt);
+    if (ret < 0) {
+        goto fail;
+    }
+    ret = fdt_open_into(fdt, fdt, *sizep);
+    if (ret) {
+        error_report("%s: Unable to copy device tree into memory: %s",
+                     __func__, fdt_strerror(ret));
+        exit(1);
+    }
+
+    return fdt;
+fail:
+    error_report("%s Couldn't create dt: %s", __func__, fdt_strerror(ret));
+    exit(1);
+}
+
+void *load_device_tree(const char *filename_path, int *sizep)
+{
+    int dt_size;
+    int dt_file_load_size;
+    int ret;
+    void *fdt = NULL;
+
+    *sizep = 0;
+    dt_size = get_image_size(filename_path);
+    if (dt_size < 0) {
+        error_report("Unable to get size of device tree file '%s'",
+                     filename_path);
+        goto fail;
+    }
+    if (dt_size > INT_MAX / 2 - 10000) {
+        error_report("Device tree file '%s' is too large", filename_path);
+        goto fail;
+    }
+
+    /* Expand to 2x size to give enough room for manipulation.  */
+    dt_size += 10000;
+    dt_size *= 2;
+    /* First allocate space in qemu for device tree */
+    fdt = g_malloc0(dt_size);
+
+    dt_file_load_size = load_image_size(filename_path, fdt, dt_size);
+    if (dt_file_load_size < 0) {
+        error_report("Unable to open device tree file '%s'",
+                     filename_path);
+        goto fail;
+    }
+
+    ret = fdt_open_into(fdt, fdt, dt_size);
+    if (ret) {
+        error_report("%s: Unable to copy device tree into memory: %s",
+                     __func__, fdt_strerror(ret));
+        goto fail;
+    }
+
+    /* Check sanity of device tree */
+    if (fdt_check_header(fdt)) {
+        error_report("Device tree file loaded into memory is invalid: %s",
+                     filename_path);
+        goto fail;
+    }
+    *sizep = dt_size;
+    return fdt;
+
+fail:
+    g_free(fdt);
+    return NULL;
+}
+
+#ifdef CONFIG_LINUX
+
+#define SYSFS_DT_BASEDIR "/proc/device-tree"
+
+/**
+ * read_fstree: this function is inspired from dtc read_fstree
+ * @fdt: preallocated fdt blob buffer, to be populated
+ * @dirname: directory to scan under SYSFS_DT_BASEDIR
+ * the search is recursive and the tree is searched down to the
+ * leaves (property files).
+ *
+ * the function asserts in case of error
+ */
+static void read_fstree(void *fdt, const char *dirname)
+{
+    DIR *d;
+    struct dirent *de;
+    struct stat st;
+    const char *root_dir = SYSFS_DT_BASEDIR;
+    const char *parent_node;
+
+    if (strstr(dirname, root_dir) != dirname) {
+        error_report("%s: %s must be searched within %s",
+                     __func__, dirname, root_dir);
+        exit(1);
+    }
+    parent_node = &dirname[strlen(SYSFS_DT_BASEDIR)];
+
+    d = opendir(dirname);
+    if (!d) {
+        error_report("%s cannot open %s", __func__, dirname);
+        exit(1);
+    }
+
+    while ((de = readdir(d)) != NULL) {
+        char *tmpnam;
+
+        if (!g_strcmp0(de->d_name, ".")
+            || !g_strcmp0(de->d_name, "..")) {
+            continue;
+        }
+
+        tmpnam = g_strdup_printf("%s/%s", dirname, de->d_name);
+
+        if (lstat(tmpnam, &st) < 0) {
+            error_report("%s cannot lstat %s", __func__, tmpnam);
+            exit(1);
+        }
+
+        if (S_ISREG(st.st_mode)) {
+            gchar *val;
+            gsize len;
+
+            if (!g_file_get_contents(tmpnam, &val, &len, NULL)) {
+                error_report("%s not able to extract info from %s",
+                             __func__, tmpnam);
+                exit(1);
+            }
+
+            if (strlen(parent_node) > 0) {
+                qemu_fdt_setprop(fdt, parent_node,
+                                 de->d_name, val, len);
+            } else {
+                qemu_fdt_setprop(fdt, "/", de->d_name, val, len);
+            }
+            g_free(val);
+        } else if (S_ISDIR(st.st_mode)) {
+            char *node_name;
+
+            node_name = g_strdup_printf("%s/%s",
+                                        parent_node, de->d_name);
+            qemu_fdt_add_subnode(fdt, node_name);
+            g_free(node_name);
+            read_fstree(fdt, tmpnam);
+        }
+
+        g_free(tmpnam);
+    }
+
+    closedir(d);
+}
+
+/* load_device_tree_from_sysfs: extract the dt blob from host sysfs */
+void *load_device_tree_from_sysfs(void)
+{
+    void *host_fdt;
+    int host_fdt_size;
+
+    host_fdt = create_device_tree(&host_fdt_size);
+    read_fstree(host_fdt, SYSFS_DT_BASEDIR);
+    if (fdt_check_header(host_fdt)) {
+        error_report("%s host device tree extracted into memory is invalid",
+                     __func__);
+        exit(1);
+    }
+    return host_fdt;
+}
+
+#endif /* CONFIG_LINUX */
+
+static int findnode_nofail(void *fdt, const char *node_path)
+{
+    int offset;
+
+    offset = fdt_path_offset(fdt, node_path);
+    if (offset < 0) {
+        error_report("%s Couldn't find node %s: %s", __func__, node_path,
+                     fdt_strerror(offset));
+        exit(1);
+    }
+
+    return offset;
+}
+
+char **qemu_fdt_node_unit_path(void *fdt, const char *name, Error **errp)
+{
+    char *prefix =  g_strdup_printf("%s@", name);
+    unsigned int path_len = 16, n = 0;
+    GSList *path_list = NULL, *iter;
+    const char *iter_name;
+    int offset, len, ret;
+    char **path_array;
+
+    offset = fdt_next_node(fdt, -1, NULL);
+
+    while (offset >= 0) {
+        iter_name = fdt_get_name(fdt, offset, &len);
+        if (!iter_name) {
+            offset = len;
+            break;
+        }
+        if (!strcmp(iter_name, name) || g_str_has_prefix(iter_name, prefix)) {
+            char *path;
+
+            path = g_malloc(path_len);
+            while ((ret = fdt_get_path(fdt, offset, path, path_len))
+                  == -FDT_ERR_NOSPACE) {
+                path_len += 16;
+                path = g_realloc(path, path_len);
+            }
+            path_list = g_slist_prepend(path_list, path);
+            n++;
+        }
+        offset = fdt_next_node(fdt, offset, NULL);
+    }
+    g_free(prefix);
+
+    if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
+        error_setg(errp, "%s: abort parsing dt for %s node units: %s",
+                   __func__, name, fdt_strerror(offset));
+        for (iter = path_list; iter; iter = iter->next) {
+            g_free(iter->data);
+        }
+        g_slist_free(path_list);
+        return NULL;
+    }
+
+    path_array = g_new(char *, n + 1);
+    path_array[n--] = NULL;
+
+    for (iter = path_list; iter; iter = iter->next) {
+        path_array[n--] = iter->data;
+    }
+
+    g_slist_free(path_list);
+
+    return path_array;
+}
+
+char **qemu_fdt_node_path(void *fdt, const char *name, const char *compat,
+                          Error **errp)
+{
+    int offset, len, ret;
+    const char *iter_name;
+    unsigned int path_len = 16, n = 0;
+    GSList *path_list = NULL, *iter;
+    char **path_array;
+
+    offset = fdt_node_offset_by_compatible(fdt, -1, compat);
+
+    while (offset >= 0) {
+        iter_name = fdt_get_name(fdt, offset, &len);
+        if (!iter_name) {
+            offset = len;
+            break;
+        }
+        if (!name || !strcmp(iter_name, name)) {
+            char *path;
+
+            path = g_malloc(path_len);
+            while ((ret = fdt_get_path(fdt, offset, path, path_len))
+                  == -FDT_ERR_NOSPACE) {
+                path_len += 16;
+                path = g_realloc(path, path_len);
+            }
+            path_list = g_slist_prepend(path_list, path);
+            n++;
+        }
+        offset = fdt_node_offset_by_compatible(fdt, offset, compat);
+    }
+
+    if (offset < 0 && offset != -FDT_ERR_NOTFOUND) {
+        error_setg(errp, "%s: abort parsing dt for %s/%s: %s",
+                   __func__, name, compat, fdt_strerror(offset));
+        for (iter = path_list; iter; iter = iter->next) {
+            g_free(iter->data);
+        }
+        g_slist_free(path_list);
+        return NULL;
+    }
+
+    path_array = g_new(char *, n + 1);
+    path_array[n--] = NULL;
+
+    for (iter = path_list; iter; iter = iter->next) {
+        path_array[n--] = iter->data;
+    }
+
+    g_slist_free(path_list);
+
+    return path_array;
+}
+
+int qemu_fdt_setprop(void *fdt, const char *node_path,
+                     const char *property, const void *val, int size)
+{
+    int r;
+
+    r = fdt_setprop(fdt, findnode_nofail(fdt, node_path), property, val, size);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s: %s", __func__, node_path,
+                     property, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_cell(void *fdt, const char *node_path,
+                          const char *property, uint32_t val)
+{
+    int r;
+
+    r = fdt_setprop_cell(fdt, findnode_nofail(fdt, node_path), property, val);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s = %#08x: %s", __func__,
+                     node_path, property, val, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_u64(void *fdt, const char *node_path,
+                         const char *property, uint64_t val)
+{
+    val = cpu_to_be64(val);
+    return qemu_fdt_setprop(fdt, node_path, property, &val, sizeof(val));
+}
+
+int qemu_fdt_setprop_string(void *fdt, const char *node_path,
+                            const char *property, const char *string)
+{
+    int r;
+
+    r = fdt_setprop_string(fdt, findnode_nofail(fdt, node_path), property, string);
+    if (r < 0) {
+        error_report("%s: Couldn't set %s/%s = %s: %s", __func__,
+                     node_path, property, string, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+/*
+ * libfdt doesn't allow us to add string arrays directly but they are
+ * test a series of null terminated strings with a length. We build
+ * the string up here so we can calculate the final length.
+ */
+int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
+                                  const char *prop, char **array, int len)
+{
+    int ret, i, total_len = 0;
+    char *str, *p;
+    for (i = 0; i < len; i++) {
+        total_len += strlen(array[i]) + 1;
+    }
+    p = str = g_malloc0(total_len);
+    for (i = 0; i < len; i++) {
+        int offset = strlen(array[i]) + 1;
+        pstrcpy(p, offset, array[i]);
+        p += offset;
+    }
+
+    ret = qemu_fdt_setprop(fdt, node_path, prop, str, total_len);
+    g_free(str);
+    return ret;
+}
+
+const void *qemu_fdt_getprop(void *fdt, const char *node_path,
+                             const char *property, int *lenp, Error **errp)
+{
+    int len;
+    const void *r;
+
+    if (!lenp) {
+        lenp = &len;
+    }
+    r = fdt_getprop(fdt, findnode_nofail(fdt, node_path), property, lenp);
+    if (!r) {
+        error_setg(errp, "%s: Couldn't get %s/%s: %s", __func__,
+                  node_path, property, fdt_strerror(*lenp));
+    }
+    return r;
+}
+
+uint32_t qemu_fdt_getprop_cell(void *fdt, const char *node_path,
+                               const char *property, int *lenp, Error **errp)
+{
+    int len;
+    const uint32_t *p;
+
+    if (!lenp) {
+        lenp = &len;
+    }
+    p = qemu_fdt_getprop(fdt, node_path, property, lenp, errp);
+    if (!p) {
+        return 0;
+    } else if (*lenp != 4) {
+        error_setg(errp, "%s: %s/%s not 4 bytes long (not a cell?)",
+                   __func__, node_path, property);
+        *lenp = -EINVAL;
+        return 0;
+    }
+    return be32_to_cpu(*p);
+}
+
+uint32_t qemu_fdt_get_phandle(void *fdt, const char *path)
+{
+    uint32_t r;
+
+    r = fdt_get_phandle(fdt, findnode_nofail(fdt, path));
+    if (r == 0) {
+        error_report("%s: Couldn't get phandle for %s: %s", __func__,
+                     path, fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
+                             const char *property,
+                             const char *target_node_path)
+{
+    uint32_t phandle = qemu_fdt_get_phandle(fdt, target_node_path);
+    return qemu_fdt_setprop_cell(fdt, node_path, property, phandle);
+}
+
+uint32_t qemu_fdt_alloc_phandle(void *fdt)
+{
+    static int phandle = 0x0;
+
+    /*
+     * We need to find out if the user gave us special instruction at
+     * which phandle id to start allocating phandles.
+     */
+    if (!phandle) {
+        phandle = machine_phandle_start(current_machine);
+    }
+
+    if (!phandle) {
+        /*
+         * None or invalid phandle given on the command line, so fall back to
+         * default starting point.
+         */
+        phandle = 0x8000;
+    }
+
+    return phandle++;
+}
+
+int qemu_fdt_nop_node(void *fdt, const char *node_path)
+{
+    int r;
+
+    r = fdt_nop_node(fdt, findnode_nofail(fdt, node_path));
+    if (r < 0) {
+        error_report("%s: Couldn't nop node %s: %s", __func__, node_path,
+                     fdt_strerror(r));
+        exit(1);
+    }
+
+    return r;
+}
+
+int qemu_fdt_add_subnode(void *fdt, const char *name)
+{
+    char *dupname = g_strdup(name);
+    char *basename = strrchr(dupname, '/');
+    int retval;
+    int parent = 0;
+
+    if (!basename) {
+        g_free(dupname);
+        return -1;
+    }
+
+    basename[0] = '\0';
+    basename++;
+
+    if (dupname[0]) {
+        parent = findnode_nofail(fdt, dupname);
+    }
+
+    retval = fdt_add_subnode(fdt, parent, basename);
+    if (retval < 0) {
+        error_report("%s: Failed to create subnode %s: %s",
+                     __func__, name, fdt_strerror(retval));
+        exit(1);
+    }
+
+    g_free(dupname);
+    return retval;
+}
+
+/*
+ * qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add
+ * all missing subnodes from the given path.
+ */
+int qemu_fdt_add_path(void *fdt, const char *path)
+{
+    const char *name;
+    int namelen, retval;
+    int parent = 0;
+
+    if (path[0] != '/') {
+        return -1;
+    }
+
+    do {
+        name = path + 1;
+        path = strchr(name, '/');
+        namelen = path != NULL ? path - name : strlen(name);
+
+        retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen);
+        if (retval < 0 && retval != -FDT_ERR_NOTFOUND) {
+            error_report("%s: Unexpected error in finding subnode %.*s: %s",
+                         __func__, namelen, name, fdt_strerror(retval));
+            exit(1);
+        } else if (retval == -FDT_ERR_NOTFOUND) {
+            retval = fdt_add_subnode_namelen(fdt, parent, name, namelen);
+            if (retval < 0) {
+                error_report("%s: Failed to create subnode %.*s: %s",
+                             __func__, namelen, name, fdt_strerror(retval));
+                exit(1);
+            }
+        }
+
+        parent = retval;
+    } while (path);
+
+    return retval;
+}
+
+void qemu_fdt_dumpdtb(void *fdt, int size)
+{
+    const char *dumpdtb = current_machine->dumpdtb;
+
+    if (dumpdtb) {
+        /* Dump the dtb to a file and quit */
+        if (g_file_set_contents(dumpdtb, fdt, size, NULL)) {
+            info_report("dtb dumped to %s. Exiting.", dumpdtb);
+            exit(0);
+        }
+        error_report("%s: Failed dumping dtb to %s", __func__, dumpdtb);
+        exit(1);
+    }
+}
+
+int qemu_fdt_setprop_sized_cells_from_array(void *fdt,
+                                            const char *node_path,
+                                            const char *property,
+                                            int numvalues,
+                                            uint64_t *values)
+{
+    uint32_t *propcells;
+    uint64_t value;
+    int cellnum, vnum, ncells;
+    uint32_t hival;
+    int ret;
+
+    propcells = g_new0(uint32_t, numvalues * 2);
+
+    cellnum = 0;
+    for (vnum = 0; vnum < numvalues; vnum++) {
+        ncells = values[vnum * 2];
+        if (ncells != 1 && ncells != 2) {
+            ret = -1;
+            goto out;
+        }
+        value = values[vnum * 2 + 1];
+        hival = cpu_to_be32(value >> 32);
+        if (ncells > 1) {
+            propcells[cellnum++] = hival;
+        } else if (hival != 0) {
+            ret = -1;
+            goto out;
+        }
+        propcells[cellnum++] = cpu_to_be32(value);
+    }
+
+    ret = qemu_fdt_setprop(fdt, node_path, property, propcells,
+                           cellnum * sizeof(uint32_t));
+out:
+    g_free(propcells);
+    return ret;
+}
+
+void qmp_dumpdtb(const char *filename, Error **errp)
+{
+    g_autoptr(GError) err = NULL;
+    uint32_t size;
+
+    if (!current_machine->fdt) {
+        error_setg(errp, "This machine doesn't have a FDT");
+        return;
+    }
+
+    size = fdt_totalsize(current_machine->fdt);
+
+    g_assert(size > 0);
+
+    if (!g_file_set_contents(filename, current_machine->fdt, size, &err)) {
+        error_setg(errp, "Error saving FDT to file %s: %s",
+                   filename, err->message);
+    }
+}
+
+void hmp_dumpdtb(Monitor *mon, const QDict *qdict)
+{
+    const char *filename = qdict_get_str(qdict, "filename");
+    Error *local_err = NULL;
+
+    qmp_dumpdtb(filename, &local_err);
+
+    if (hmp_handle_error(mon, local_err)) {
+        return;
+    }
+
+    info_report("dtb dumped to %s", filename);
+}
+
+void qemu_fdt_randomize_seeds(void *fdt)
+{
+    int noffset, poffset, len;
+    const char *name;
+    uint8_t *data;
+
+    for (noffset = fdt_next_node(fdt, 0, NULL);
+         noffset >= 0;
+         noffset = fdt_next_node(fdt, noffset, NULL)) {
+        for (poffset = fdt_first_property_offset(fdt, noffset);
+             poffset >= 0;
+             poffset = fdt_next_property_offset(fdt, poffset)) {
+            data = (uint8_t *)fdt_getprop_by_offset(fdt, poffset, &name, &len);
+            if (!data || strcmp(name, "rng-seed"))
+                continue;
+            qemu_guest_getrandom_nofail(data, len);
+        }
+    }
+}
diff --git a/system/dirtylimit.c b/system/dirtylimit.c
new file mode 100644
index 0000000..fa959d7
--- /dev/null
+++ b/system/dirtylimit.c
@@ -0,0 +1,678 @@
+/*
+ * Dirty page rate limit implementation code
+ *
+ * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
+ *
+ * Authors:
+ *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+#include "sysemu/dirtyrate.h"
+#include "sysemu/dirtylimit.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "exec/memory.h"
+#include "exec/target_page.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "migration/options.h"
+
+/*
+ * Dirtylimit stop working if dirty page rate error
+ * value less than DIRTYLIMIT_TOLERANCE_RANGE
+ */
+#define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
+/*
+ * Plus or minus vcpu sleep time linearly if dirty
+ * page rate error value percentage over
+ * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
+ * Otherwise, plus or minus a fixed vcpu sleep time.
+ */
+#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
+/*
+ * Max vcpu sleep time percentage during a cycle
+ * composed of dirty ring full and sleep time.
+ */
+#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
+
+struct {
+    VcpuStat stat;
+    bool running;
+    QemuThread thread;
+} *vcpu_dirty_rate_stat;
+
+typedef struct VcpuDirtyLimitState {
+    int cpu_index;
+    bool enabled;
+    /*
+     * Quota dirty page rate, unit is MB/s
+     * zero if not enabled.
+     */
+    uint64_t quota;
+} VcpuDirtyLimitState;
+
+struct {
+    VcpuDirtyLimitState *states;
+    /* Max cpus number configured by user */
+    int max_cpus;
+    /* Number of vcpu under dirtylimit */
+    int limited_nvcpu;
+} *dirtylimit_state;
+
+/* protect dirtylimit_state */
+static QemuMutex dirtylimit_mutex;
+
+/* dirtylimit thread quit if dirtylimit_quit is true */
+static bool dirtylimit_quit;
+
+static void vcpu_dirty_rate_stat_collect(void)
+{
+    MigrationState *s = migrate_get_current();
+    VcpuStat stat;
+    int i = 0;
+    int64_t period = DIRTYLIMIT_CALC_TIME_MS;
+
+    if (migrate_dirty_limit() &&
+        migration_is_active(s)) {
+        period = s->parameters.x_vcpu_dirty_limit_period;
+    }
+
+    /* calculate vcpu dirtyrate */
+    vcpu_calculate_dirtyrate(period,
+                              &stat,
+                              GLOBAL_DIRTY_LIMIT,
+                              false);
+
+    for (i = 0; i < stat.nvcpu; i++) {
+        vcpu_dirty_rate_stat->stat.rates[i].id = i;
+        vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
+            stat.rates[i].dirty_rate;
+    }
+
+    g_free(stat.rates);
+}
+
+static void *vcpu_dirty_rate_stat_thread(void *opaque)
+{
+    rcu_register_thread();
+
+    /* start log sync */
+    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
+
+    while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
+        vcpu_dirty_rate_stat_collect();
+        if (dirtylimit_in_service()) {
+            dirtylimit_process();
+        }
+    }
+
+    /* stop log sync */
+    global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+int64_t vcpu_dirty_rate_get(int cpu_index)
+{
+    DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
+    return qatomic_read_i64(&rates[cpu_index].dirty_rate);
+}
+
+void vcpu_dirty_rate_stat_start(void)
+{
+    if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
+        return;
+    }
+
+    qatomic_set(&vcpu_dirty_rate_stat->running, 1);
+    qemu_thread_create(&vcpu_dirty_rate_stat->thread,
+                       "dirtyrate-stat",
+                       vcpu_dirty_rate_stat_thread,
+                       NULL,
+                       QEMU_THREAD_JOINABLE);
+}
+
+void vcpu_dirty_rate_stat_stop(void)
+{
+    qatomic_set(&vcpu_dirty_rate_stat->running, 0);
+    dirtylimit_state_unlock();
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(&vcpu_dirty_rate_stat->thread);
+    qemu_mutex_lock_iothread();
+    dirtylimit_state_lock();
+}
+
+void vcpu_dirty_rate_stat_initialize(void)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+
+    vcpu_dirty_rate_stat =
+        g_malloc0(sizeof(*vcpu_dirty_rate_stat));
+
+    vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
+    vcpu_dirty_rate_stat->stat.rates =
+        g_new0(DirtyRateVcpu, max_cpus);
+
+    vcpu_dirty_rate_stat->running = false;
+}
+
+void vcpu_dirty_rate_stat_finalize(void)
+{
+    g_free(vcpu_dirty_rate_stat->stat.rates);
+    vcpu_dirty_rate_stat->stat.rates = NULL;
+
+    g_free(vcpu_dirty_rate_stat);
+    vcpu_dirty_rate_stat = NULL;
+}
+
+void dirtylimit_state_lock(void)
+{
+    qemu_mutex_lock(&dirtylimit_mutex);
+}
+
+void dirtylimit_state_unlock(void)
+{
+    qemu_mutex_unlock(&dirtylimit_mutex);
+}
+
+static void
+__attribute__((__constructor__)) dirtylimit_mutex_init(void)
+{
+    qemu_mutex_init(&dirtylimit_mutex);
+}
+
+static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
+{
+    return &dirtylimit_state->states[cpu_index];
+}
+
+void dirtylimit_state_initialize(void)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+    int i;
+
+    dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
+
+    dirtylimit_state->states =
+            g_new0(VcpuDirtyLimitState, max_cpus);
+
+    for (i = 0; i < max_cpus; i++) {
+        dirtylimit_state->states[i].cpu_index = i;
+    }
+
+    dirtylimit_state->max_cpus = max_cpus;
+    trace_dirtylimit_state_initialize(max_cpus);
+}
+
+void dirtylimit_state_finalize(void)
+{
+    g_free(dirtylimit_state->states);
+    dirtylimit_state->states = NULL;
+
+    g_free(dirtylimit_state);
+    dirtylimit_state = NULL;
+
+    trace_dirtylimit_state_finalize();
+}
+
+bool dirtylimit_in_service(void)
+{
+    return !!dirtylimit_state;
+}
+
+bool dirtylimit_vcpu_index_valid(int cpu_index)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    return !(cpu_index < 0 ||
+             cpu_index >= ms->smp.max_cpus);
+}
+
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
+{
+    static uint64_t max_dirtyrate;
+    uint64_t dirty_ring_size_MiB;
+
+    dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
+
+    if (max_dirtyrate < dirtyrate) {
+        max_dirtyrate = dirtyrate;
+    }
+
+    return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
+}
+
+static inline bool dirtylimit_done(uint64_t quota,
+                                   uint64_t current)
+{
+    uint64_t min, max;
+
+    min = MIN(quota, current);
+    max = MAX(quota, current);
+
+    return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
+}
+
+static inline bool
+dirtylimit_need_linear_adjustment(uint64_t quota,
+                                  uint64_t current)
+{
+    uint64_t min, max;
+
+    min = MIN(quota, current);
+    max = MAX(quota, current);
+
+    return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
+}
+
+static void dirtylimit_set_throttle(CPUState *cpu,
+                                    uint64_t quota,
+                                    uint64_t current)
+{
+    int64_t ring_full_time_us = 0;
+    uint64_t sleep_pct = 0;
+    uint64_t throttle_us = 0;
+
+    if (current == 0) {
+        cpu->throttle_us_per_full = 0;
+        return;
+    }
+
+    ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
+
+    if (dirtylimit_need_linear_adjustment(quota, current)) {
+        if (quota < current) {
+            sleep_pct = (current - quota) * 100 / current;
+            throttle_us =
+                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
+            cpu->throttle_us_per_full += throttle_us;
+        } else {
+            sleep_pct = (quota - current) * 100 / quota;
+            throttle_us =
+                ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
+            cpu->throttle_us_per_full -= throttle_us;
+        }
+
+        trace_dirtylimit_throttle_pct(cpu->cpu_index,
+                                      sleep_pct,
+                                      throttle_us);
+    } else {
+        if (quota < current) {
+            cpu->throttle_us_per_full += ring_full_time_us / 10;
+        } else {
+            cpu->throttle_us_per_full -= ring_full_time_us / 10;
+        }
+    }
+
+    /*
+     * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
+     *       current dirty page rate may never reach the quota, we should stop
+     *       increasing sleep time?
+     */
+    cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
+        ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
+
+    cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
+}
+
+static void dirtylimit_adjust_throttle(CPUState *cpu)
+{
+    uint64_t quota = 0;
+    uint64_t current = 0;
+    int cpu_index = cpu->cpu_index;
+
+    quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
+    current = vcpu_dirty_rate_get(cpu_index);
+
+    if (!dirtylimit_done(quota, current)) {
+        dirtylimit_set_throttle(cpu, quota, current);
+    }
+
+    return;
+}
+
+void dirtylimit_process(void)
+{
+    CPUState *cpu;
+
+    if (!qatomic_read(&dirtylimit_quit)) {
+        dirtylimit_state_lock();
+
+        if (!dirtylimit_in_service()) {
+            dirtylimit_state_unlock();
+            return;
+        }
+
+        CPU_FOREACH(cpu) {
+            if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
+                continue;
+            }
+            dirtylimit_adjust_throttle(cpu);
+        }
+        dirtylimit_state_unlock();
+    }
+}
+
+void dirtylimit_change(bool start)
+{
+    if (start) {
+        qatomic_set(&dirtylimit_quit, 0);
+    } else {
+        qatomic_set(&dirtylimit_quit, 1);
+    }
+}
+
+void dirtylimit_set_vcpu(int cpu_index,
+                         uint64_t quota,
+                         bool enable)
+{
+    trace_dirtylimit_set_vcpu(cpu_index, quota);
+
+    if (enable) {
+        dirtylimit_state->states[cpu_index].quota = quota;
+        if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
+            dirtylimit_state->limited_nvcpu++;
+        }
+    } else {
+        dirtylimit_state->states[cpu_index].quota = 0;
+        if (dirtylimit_state->states[cpu_index].enabled) {
+            dirtylimit_state->limited_nvcpu--;
+        }
+    }
+
+    dirtylimit_state->states[cpu_index].enabled = enable;
+}
+
+void dirtylimit_set_all(uint64_t quota,
+                        bool enable)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    int max_cpus = ms->smp.max_cpus;
+    int i;
+
+    for (i = 0; i < max_cpus; i++) {
+        dirtylimit_set_vcpu(i, quota, enable);
+    }
+}
+
+void dirtylimit_vcpu_execute(CPUState *cpu)
+{
+    if (dirtylimit_in_service() &&
+        dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
+        cpu->throttle_us_per_full) {
+        trace_dirtylimit_vcpu_execute(cpu->cpu_index,
+                cpu->throttle_us_per_full);
+        usleep(cpu->throttle_us_per_full);
+    }
+}
+
+static void dirtylimit_init(void)
+{
+    dirtylimit_state_initialize();
+    dirtylimit_change(true);
+    vcpu_dirty_rate_stat_initialize();
+    vcpu_dirty_rate_stat_start();
+}
+
+static void dirtylimit_cleanup(void)
+{
+    vcpu_dirty_rate_stat_stop();
+    vcpu_dirty_rate_stat_finalize();
+    dirtylimit_change(false);
+    dirtylimit_state_finalize();
+}
+
+/*
+ * dirty page rate limit is not allowed to set if migration
+ * is running with dirty-limit capability enabled.
+ */
+static bool dirtylimit_is_allowed(void)
+{
+    MigrationState *ms = migrate_get_current();
+
+    if (migration_is_running(ms->state) &&
+        (!qemu_thread_is_self(&ms->thread)) &&
+        migrate_dirty_limit() &&
+        dirtylimit_in_service()) {
+        return false;
+    }
+    return true;
+}
+
+void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
+                                 int64_t cpu_index,
+                                 Error **errp)
+{
+    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+        return;
+    }
+
+    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
+        error_setg(errp, "incorrect cpu index specified");
+        return;
+    }
+
+    if (!dirtylimit_is_allowed()) {
+        error_setg(errp, "can't cancel dirty page rate limit while"
+                   " migration is running");
+        return;
+    }
+
+    if (!dirtylimit_in_service()) {
+        return;
+    }
+
+    dirtylimit_state_lock();
+
+    if (has_cpu_index) {
+        dirtylimit_set_vcpu(cpu_index, 0, false);
+    } else {
+        dirtylimit_set_all(0, false);
+    }
+
+    if (!dirtylimit_state->limited_nvcpu) {
+        dirtylimit_cleanup();
+    }
+
+    dirtylimit_state_unlock();
+}
+
+void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
+    Error *err = NULL;
+
+    qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
+                   "dirty limit for virtual CPU]\n");
+}
+
+void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
+                              int64_t cpu_index,
+                              uint64_t dirty_rate,
+                              Error **errp)
+{
+    if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
+        error_setg(errp, "dirty page limit feature requires KVM with"
+                   " accelerator property 'dirty-ring-size' set'");
+        return;
+    }
+
+    if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
+        error_setg(errp, "incorrect cpu index specified");
+        return;
+    }
+
+    if (!dirtylimit_is_allowed()) {
+        error_setg(errp, "can't set dirty page rate limit while"
+                   " migration is running");
+        return;
+    }
+
+    if (!dirty_rate) {
+        qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
+        return;
+    }
+
+    dirtylimit_state_lock();
+
+    if (!dirtylimit_in_service()) {
+        dirtylimit_init();
+    }
+
+    if (has_cpu_index) {
+        dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
+    } else {
+        dirtylimit_set_all(dirty_rate, true);
+    }
+
+    dirtylimit_state_unlock();
+}
+
+void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
+    int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
+    Error *err = NULL;
+
+    if (dirty_rate < 0) {
+        error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
+        goto out;
+    }
+
+    qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
+
+out:
+    hmp_handle_error(mon, err);
+}
+
+/* Return the max throttle time of each virtual CPU */
+uint64_t dirtylimit_throttle_time_per_round(void)
+{
+    CPUState *cpu;
+    int64_t max = 0;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->throttle_us_per_full > max) {
+            max = cpu->throttle_us_per_full;
+        }
+    }
+
+    return max;
+}
+
+/*
+ * Estimate average dirty ring full time of each virtaul CPU.
+ * Return 0 if guest doesn't dirty memory.
+ */
+uint64_t dirtylimit_ring_full_time(void)
+{
+    CPUState *cpu;
+    uint64_t curr_rate = 0;
+    int nvcpus = 0;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->running) {
+            nvcpus++;
+            curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
+        }
+    }
+
+    if (!curr_rate || !nvcpus) {
+        return 0;
+    }
+
+    return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
+}
+
+static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
+{
+    DirtyLimitInfo *info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->cpu_index = cpu_index;
+    info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
+    info->current_rate = vcpu_dirty_rate_get(cpu_index);
+
+    return info;
+}
+
+static struct DirtyLimitInfoList *dirtylimit_query_all(void)
+{
+    int i, index;
+    DirtyLimitInfo *info = NULL;
+    DirtyLimitInfoList *head = NULL, **tail = &head;
+
+    dirtylimit_state_lock();
+
+    if (!dirtylimit_in_service()) {
+        dirtylimit_state_unlock();
+        return NULL;
+    }
+
+    for (i = 0; i < dirtylimit_state->max_cpus; i++) {
+        index = dirtylimit_state->states[i].cpu_index;
+        if (dirtylimit_vcpu_get_state(index)->enabled) {
+            info = dirtylimit_query_vcpu(index);
+            QAPI_LIST_APPEND(tail, info);
+        }
+    }
+
+    dirtylimit_state_unlock();
+
+    return head;
+}
+
+struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
+{
+    if (!dirtylimit_in_service()) {
+        return NULL;
+    }
+
+    return dirtylimit_query_all();
+}
+
+void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
+{
+    DirtyLimitInfoList *info;
+    g_autoptr(DirtyLimitInfoList) head = NULL;
+    Error *err = NULL;
+
+    if (!dirtylimit_in_service()) {
+        monitor_printf(mon, "Dirty page limit not enabled!\n");
+        return;
+    }
+
+    head = qmp_query_vcpu_dirty_limit(&err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    for (info = head; info != NULL; info = info->next) {
+        monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
+                            " current rate %"PRIi64 " (MB/s)\n",
+                            info->value->cpu_index,
+                            info->value->limit_rate,
+                            info->value->current_rate);
+    }
+}
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
new file mode 100644
index 0000000..36211ac
--- /dev/null
+++ b/system/dma-helpers.c
@@ -0,0 +1,347 @@
+/*
+ * DMA helper functions
+ *
+ * Copyright (c) 2009,2020 Red Hat
+ *
+ * This work is licensed under the terms of the GNU General Public License
+ * (GNU GPL), version 2 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/dma.h"
+#include "trace/trace-root.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/range.h"
+
+/* #define DEBUG_IOMMU */
+
+MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
+                           uint8_t c, dma_addr_t len, MemTxAttrs attrs)
+{
+    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
+
+    return address_space_set(as, addr, c, len, attrs);
+}
+
+void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
+                      AddressSpace *as)
+{
+    qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
+    qsg->nsg = 0;
+    qsg->nalloc = alloc_hint;
+    qsg->size = 0;
+    qsg->as = as;
+    qsg->dev = dev;
+    object_ref(OBJECT(dev));
+}
+
+void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
+{
+    if (qsg->nsg == qsg->nalloc) {
+        qsg->nalloc = 2 * qsg->nalloc + 1;
+        qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
+    }
+    qsg->sg[qsg->nsg].base = base;
+    qsg->sg[qsg->nsg].len = len;
+    qsg->size += len;
+    ++qsg->nsg;
+}
+
+void qemu_sglist_destroy(QEMUSGList *qsg)
+{
+    object_unref(OBJECT(qsg->dev));
+    g_free(qsg->sg);
+    memset(qsg, 0, sizeof(*qsg));
+}
+
+typedef struct {
+    BlockAIOCB common;
+    AioContext *ctx;
+    BlockAIOCB *acb;
+    QEMUSGList *sg;
+    uint32_t align;
+    uint64_t offset;
+    DMADirection dir;
+    int sg_cur_index;
+    dma_addr_t sg_cur_byte;
+    QEMUIOVector iov;
+    QEMUBH *bh;
+    DMAIOFunc *io_func;
+    void *io_func_opaque;
+} DMAAIOCB;
+
+static void dma_blk_cb(void *opaque, int ret);
+
+static void reschedule_dma(void *opaque)
+{
+    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+
+    assert(!dbs->acb && dbs->bh);
+    qemu_bh_delete(dbs->bh);
+    dbs->bh = NULL;
+    dma_blk_cb(dbs, 0);
+}
+
+static void dma_blk_unmap(DMAAIOCB *dbs)
+{
+    int i;
+
+    for (i = 0; i < dbs->iov.niov; ++i) {
+        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
+                         dbs->iov.iov[i].iov_len, dbs->dir,
+                         dbs->iov.iov[i].iov_len);
+    }
+    qemu_iovec_reset(&dbs->iov);
+}
+
+static void dma_complete(DMAAIOCB *dbs, int ret)
+{
+    trace_dma_complete(dbs, ret, dbs->common.cb);
+
+    assert(!dbs->acb && !dbs->bh);
+    dma_blk_unmap(dbs);
+    if (dbs->common.cb) {
+        dbs->common.cb(dbs->common.opaque, ret);
+    }
+    qemu_iovec_destroy(&dbs->iov);
+    qemu_aio_unref(dbs);
+}
+
+static void dma_blk_cb(void *opaque, int ret)
+{
+    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
+    AioContext *ctx = dbs->ctx;
+    dma_addr_t cur_addr, cur_len;
+    void *mem;
+
+    trace_dma_blk_cb(dbs, ret);
+
+    aio_context_acquire(ctx);
+    dbs->acb = NULL;
+    dbs->offset += dbs->iov.size;
+
+    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
+        dma_complete(dbs, ret);
+        goto out;
+    }
+    dma_blk_unmap(dbs);
+
+    while (dbs->sg_cur_index < dbs->sg->nsg) {
+        cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
+        cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
+        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
+                             MEMTXATTRS_UNSPECIFIED);
+        /*
+         * Make reads deterministic in icount mode. Windows sometimes issues
+         * disk read requests with overlapping SGs. It leads
+         * to non-determinism, because resulting buffer contents may be mixed
+         * from several sectors. This code splits all SGs into several
+         * groups. SGs in every group do not overlap.
+         */
+        if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
+            int i;
+            for (i = 0 ; i < dbs->iov.niov ; ++i) {
+                if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
+                                   dbs->iov.iov[i].iov_len, (intptr_t)mem,
+                                   cur_len)) {
+                    dma_memory_unmap(dbs->sg->as, mem, cur_len,
+                                     dbs->dir, cur_len);
+                    mem = NULL;
+                    break;
+                }
+            }
+        }
+        if (!mem)
+            break;
+        qemu_iovec_add(&dbs->iov, mem, cur_len);
+        dbs->sg_cur_byte += cur_len;
+        if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
+            dbs->sg_cur_byte = 0;
+            ++dbs->sg_cur_index;
+        }
+    }
+
+    if (dbs->iov.size == 0) {
+        trace_dma_map_wait(dbs);
+        dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
+        cpu_register_map_client(dbs->bh);
+        goto out;
+    }
+
+    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
+        qemu_iovec_discard_back(&dbs->iov,
+                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
+    }
+
+    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
+                            dma_blk_cb, dbs, dbs->io_func_opaque);
+    assert(dbs->acb);
+out:
+    aio_context_release(ctx);
+}
+
+static void dma_aio_cancel(BlockAIOCB *acb)
+{
+    DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
+
+    trace_dma_aio_cancel(dbs);
+
+    assert(!(dbs->acb && dbs->bh));
+    if (dbs->acb) {
+        /* This will invoke dma_blk_cb.  */
+        blk_aio_cancel_async(dbs->acb);
+        return;
+    }
+
+    if (dbs->bh) {
+        cpu_unregister_map_client(dbs->bh);
+        qemu_bh_delete(dbs->bh);
+        dbs->bh = NULL;
+    }
+    if (dbs->common.cb) {
+        dbs->common.cb(dbs->common.opaque, -ECANCELED);
+    }
+}
+
+static const AIOCBInfo dma_aiocb_info = {
+    .aiocb_size         = sizeof(DMAAIOCB),
+    .cancel_async       = dma_aio_cancel,
+};
+
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+    QEMUSGList *sg, uint64_t offset, uint32_t align,
+    DMAIOFunc *io_func, void *io_func_opaque,
+    BlockCompletionFunc *cb,
+    void *opaque, DMADirection dir)
+{
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
+
+    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
+
+    dbs->acb = NULL;
+    dbs->sg = sg;
+    dbs->ctx = ctx;
+    dbs->offset = offset;
+    dbs->align = align;
+    dbs->sg_cur_index = 0;
+    dbs->sg_cur_byte = 0;
+    dbs->dir = dir;
+    dbs->io_func = io_func;
+    dbs->io_func_opaque = io_func_opaque;
+    dbs->bh = NULL;
+    qemu_iovec_init(&dbs->iov, sg->nsg);
+    dma_blk_cb(dbs, 0);
+    return &dbs->common;
+}
+
+
+static
+BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
+                                 BlockCompletionFunc *cb, void *cb_opaque,
+                                 void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
+}
+
+BlockAIOCB *dma_blk_read(BlockBackend *blk,
+                         QEMUSGList *sg, uint64_t offset, uint32_t align,
+                         void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_read_io_func, blk, cb, opaque,
+                      DMA_DIRECTION_FROM_DEVICE);
+}
+
+static
+BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
+}
+
+BlockAIOCB *dma_blk_write(BlockBackend *blk,
+                          QEMUSGList *sg, uint64_t offset, uint32_t align,
+                          void (*cb)(void *opaque, int ret), void *opaque)
+{
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_write_io_func, blk, cb, opaque,
+                      DMA_DIRECTION_TO_DEVICE);
+}
+
+
+static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
+                              QEMUSGList *sg, DMADirection dir,
+                              MemTxAttrs attrs)
+{
+    uint8_t *ptr = buf;
+    dma_addr_t xresidual;
+    int sg_cur_index;
+    MemTxResult res = MEMTX_OK;
+
+    xresidual = sg->size;
+    sg_cur_index = 0;
+    len = MIN(len, xresidual);
+    while (len > 0) {
+        ScatterGatherEntry entry = sg->sg[sg_cur_index++];
+        dma_addr_t xfer = MIN(len, entry.len);
+        res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
+        ptr += xfer;
+        len -= xfer;
+        xresidual -= xfer;
+    }
+
+    if (residual) {
+        *residual = xresidual;
+    }
+    return res;
+}
+
+MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                         QEMUSGList *sg, MemTxAttrs attrs)
+{
+    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
+}
+
+MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
+                          QEMUSGList *sg, MemTxAttrs attrs)
+{
+    return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
+}
+
+void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
+                    QEMUSGList *sg, enum BlockAcctType type)
+{
+    block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
+}
+
+uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
+{
+    uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
+    uint64_t alignment_mask, size_mask;
+
+    if (max_addr_bits != 64) {
+        max_mask = (1ULL << max_addr_bits) - 1;
+    }
+
+    alignment_mask = start ? (start & -start) - 1 : max_mask;
+    alignment_mask = MIN(alignment_mask, max_mask);
+    size_mask = MIN(addr_mask, max_mask);
+
+    if (alignment_mask <= size_mask) {
+        /* Increase the alignment of start */
+        return alignment_mask;
+    } else {
+        /* Find the largest page mask from size */
+        if (addr_mask == UINT64_MAX) {
+            return UINT64_MAX;
+        }
+        return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
+    }
+}
+
diff --git a/system/globals.c b/system/globals.c
new file mode 100644
index 0000000..e83b542
--- /dev/null
+++ b/system/globals.c
@@ -0,0 +1,70 @@
+/*
+ * Global variables that (mostly) should not exist
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+#include "hw/display/vga.h"
+#include "hw/loader.h"
+#include "hw/xen/xen.h"
+#include "net/net.h"
+#include "sysemu/cpus.h"
+#include "sysemu/sysemu.h"
+
+enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
+int display_opengl;
+const char* keyboard_layout;
+bool enable_mlock;
+bool enable_cpu_pm;
+int nb_nics;
+NICInfo nd_table[MAX_NICS];
+int autostart = 1;
+int vga_interface_type = VGA_NONE;
+bool vga_interface_created;
+Chardev *parallel_hds[MAX_PARALLEL_PORTS];
+int win2k_install_hack;
+int fd_bootchk = 1;
+int graphic_rotate;
+QEMUOptionRom option_rom[MAX_OPTION_ROMS];
+int nb_option_roms;
+int old_param;
+const char *qemu_name;
+unsigned int nb_prom_envs;
+const char *prom_envs[MAX_PROM_ENVS];
+uint8_t *boot_splash_filedata;
+int only_migratable; /* turn it off unless user states otherwise */
+int icount_align_option;
+
+/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
+ * little-endian "wire format" described in the SMBIOS 2.6 specification.
+ */
+QemuUUID qemu_uuid;
+bool qemu_uuid_set;
+
+uint32_t xen_domid;
+enum xen_mode xen_mode = XEN_DISABLED;
+bool xen_domid_restrict;
+struct evtchn_backend_ops *xen_evtchn_ops;
+struct gnttab_backend_ops *xen_gnttab_ops;
+struct foreignmem_backend_ops *xen_foreignmem_ops;
+struct xenstore_backend_ops *xen_xenstore_ops;
diff --git a/system/ioport.c b/system/ioport.c
new file mode 100644
index 0000000..1824aa8
--- /dev/null
+++ b/system/ioport.c
@@ -0,0 +1,346 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ * split out ioport related stuffs from vl.c.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/ioport.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "trace.h"
+
+struct MemoryRegionPortioList {
+    Object obj;
+
+    MemoryRegion mr;
+    void *portio_opaque;
+    MemoryRegionPortio *ports;
+};
+
+#define TYPE_MEMORY_REGION_PORTIO_LIST "memory-region-portio-list"
+OBJECT_DECLARE_SIMPLE_TYPE(MemoryRegionPortioList, MEMORY_REGION_PORTIO_LIST)
+
+static uint64_t unassigned_io_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return -1ULL;
+}
+
+static void unassigned_io_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned size)
+{
+}
+
+const MemoryRegionOps unassigned_io_ops = {
+    .read = unassigned_io_read,
+    .write = unassigned_io_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+void cpu_outb(uint32_t addr, uint8_t val)
+{
+    trace_cpu_out(addr, 'b', val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        &val, 1);
+}
+
+void cpu_outw(uint32_t addr, uint16_t val)
+{
+    uint8_t buf[2];
+
+    trace_cpu_out(addr, 'w', val);
+    stw_p(buf, val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 2);
+}
+
+void cpu_outl(uint32_t addr, uint32_t val)
+{
+    uint8_t buf[4];
+
+    trace_cpu_out(addr, 'l', val);
+    stl_p(buf, val);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 4);
+}
+
+uint8_t cpu_inb(uint32_t addr)
+{
+    uint8_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                       &val, 1);
+    trace_cpu_in(addr, 'b', val);
+    return val;
+}
+
+uint16_t cpu_inw(uint32_t addr)
+{
+    uint8_t buf[2];
+    uint16_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2);
+    val = lduw_p(buf);
+    trace_cpu_in(addr, 'w', val);
+    return val;
+}
+
+uint32_t cpu_inl(uint32_t addr)
+{
+    uint8_t buf[4];
+    uint32_t val;
+
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4);
+    val = ldl_p(buf);
+    trace_cpu_in(addr, 'l', val);
+    return val;
+}
+
+void portio_list_init(PortioList *piolist,
+                      Object *owner,
+                      const MemoryRegionPortio *callbacks,
+                      void *opaque, const char *name)
+{
+    unsigned n = 0;
+
+    while (callbacks[n].size) {
+        ++n;
+    }
+
+    piolist->ports = callbacks;
+    piolist->nr = 0;
+    piolist->regions = g_new0(MemoryRegion *, n);
+    piolist->address_space = NULL;
+    piolist->opaque = opaque;
+    piolist->owner = owner;
+    piolist->name = name;
+    piolist->flush_coalesced_mmio = false;
+}
+
+void portio_list_set_flush_coalesced(PortioList *piolist)
+{
+    piolist->flush_coalesced_mmio = true;
+}
+
+void portio_list_destroy(PortioList *piolist)
+{
+    MemoryRegionPortioList *mrpio;
+    unsigned i;
+
+    for (i = 0; i < piolist->nr; ++i) {
+        mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
+        object_unparent(OBJECT(&mrpio->mr));
+        object_unref(mrpio);
+    }
+    g_free(piolist->regions);
+}
+
+static const MemoryRegionPortio *find_portio(MemoryRegionPortioList *mrpio,
+                                             uint64_t offset, unsigned size,
+                                             bool write)
+{
+    const MemoryRegionPortio *mrp;
+
+    for (mrp = mrpio->ports; mrp->size; ++mrp) {
+        if (offset >= mrp->offset && offset < mrp->offset + mrp->len &&
+            size == mrp->size &&
+            (write ? (bool)mrp->write : (bool)mrp->read)) {
+            return mrp;
+        }
+    }
+    return NULL;
+}
+
+static uint64_t portio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    MemoryRegionPortioList *mrpio = opaque;
+    const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, false);
+    uint64_t data;
+
+    data = ((uint64_t)1 << (size * 8)) - 1;
+    if (mrp) {
+        data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
+    } else if (size == 2) {
+        mrp = find_portio(mrpio, addr, 1, false);
+        if (mrp) {
+            data = mrp->read(mrpio->portio_opaque, mrp->base + addr);
+            if (addr + 1 < mrp->offset + mrp->len) {
+                data |= mrp->read(mrpio->portio_opaque, mrp->base + addr + 1) << 8;
+            } else {
+                data |= 0xff00;
+            }
+        }
+    }
+    return data;
+}
+
+static void portio_write(void *opaque, hwaddr addr, uint64_t data,
+                         unsigned size)
+{
+    MemoryRegionPortioList *mrpio = opaque;
+    const MemoryRegionPortio *mrp = find_portio(mrpio, addr, size, true);
+
+    if (mrp) {
+        mrp->write(mrpio->portio_opaque, mrp->base + addr, data);
+    } else if (size == 2) {
+        mrp = find_portio(mrpio, addr, 1, true);
+        if (mrp) {
+            mrp->write(mrpio->portio_opaque, mrp->base + addr, data & 0xff);
+            if (addr + 1 < mrp->offset + mrp->len) {
+                mrp->write(mrpio->portio_opaque, mrp->base + addr + 1, data >> 8);
+            }
+        }
+    }
+}
+
+static const MemoryRegionOps portio_ops = {
+    .read = portio_read,
+    .write = portio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.unaligned = true,
+    .impl.unaligned = true,
+};
+
+static void portio_list_add_1(PortioList *piolist,
+                              const MemoryRegionPortio *pio_init,
+                              unsigned count, unsigned start,
+                              unsigned off_low, unsigned off_high)
+{
+    MemoryRegionPortioList *mrpio;
+    Object *owner;
+    char *name;
+    unsigned i;
+
+    /* Copy the sub-list and null-terminate it.  */
+    mrpio = MEMORY_REGION_PORTIO_LIST(
+                object_new(TYPE_MEMORY_REGION_PORTIO_LIST));
+    mrpio->portio_opaque = piolist->opaque;
+    mrpio->ports = g_malloc0(sizeof(MemoryRegionPortio) * (count + 1));
+    memcpy(mrpio->ports, pio_init, sizeof(MemoryRegionPortio) * count);
+    memset(mrpio->ports + count, 0, sizeof(MemoryRegionPortio));
+
+    /* Adjust the offsets to all be zero-based for the region.  */
+    for (i = 0; i < count; ++i) {
+        mrpio->ports[i].offset -= off_low;
+        mrpio->ports[i].base = start + off_low;
+    }
+
+    /*
+     * The MemoryRegion owner is the MemoryRegionPortioList since that manages
+     * the lifecycle via the refcount
+     */
+    memory_region_init_io(&mrpio->mr, OBJECT(mrpio), &portio_ops, mrpio,
+                          piolist->name, off_high - off_low);
+
+    /* Reparent the MemoryRegion to the piolist owner */
+    object_ref(&mrpio->mr);
+    object_unparent(OBJECT(&mrpio->mr));
+    if (!piolist->owner) {
+        owner = container_get(qdev_get_machine(), "/unattached");
+    } else {
+        owner = piolist->owner;
+    }
+    name = g_strdup_printf("%s[*]", piolist->name);
+    object_property_add_child(owner, name, OBJECT(&mrpio->mr));
+    g_free(name);
+
+    if (piolist->flush_coalesced_mmio) {
+        memory_region_set_flush_coalesced(&mrpio->mr);
+    }
+    memory_region_add_subregion(piolist->address_space,
+                                start + off_low, &mrpio->mr);
+    piolist->regions[piolist->nr] = &mrpio->mr;
+    ++piolist->nr;
+}
+
+void portio_list_add(PortioList *piolist,
+                     MemoryRegion *address_space,
+                     uint32_t start)
+{
+    const MemoryRegionPortio *pio, *pio_start = piolist->ports;
+    unsigned int off_low, off_high, off_last, count;
+
+    piolist->address_space = address_space;
+
+    /* Handle the first entry specially.  */
+    off_last = off_low = pio_start->offset;
+    off_high = off_low + pio_start->len + pio_start->size - 1;
+    count = 1;
+
+    for (pio = pio_start + 1; pio->size != 0; pio++, count++) {
+        /* All entries must be sorted by offset.  */
+        assert(pio->offset >= off_last);
+        off_last = pio->offset;
+
+        /* If we see a hole, break the region.  */
+        if (off_last > off_high) {
+            portio_list_add_1(piolist, pio_start, count, start, off_low,
+                              off_high);
+            /* ... and start collecting anew.  */
+            pio_start = pio;
+            off_low = off_last;
+            off_high = off_low + pio->len + pio_start->size - 1;
+            count = 0;
+        } else if (off_last + pio->len > off_high) {
+            off_high = off_last + pio->len + pio_start->size - 1;
+        }
+    }
+
+    /* There will always be an open sub-list.  */
+    portio_list_add_1(piolist, pio_start, count, start, off_low, off_high);
+}
+
+void portio_list_del(PortioList *piolist)
+{
+    MemoryRegionPortioList *mrpio;
+    unsigned i;
+
+    for (i = 0; i < piolist->nr; ++i) {
+        mrpio = container_of(piolist->regions[i], MemoryRegionPortioList, mr);
+        memory_region_del_subregion(piolist->address_space, &mrpio->mr);
+    }
+}
+
+static void memory_region_portio_list_finalize(Object *obj)
+{
+    MemoryRegionPortioList *mrpio = MEMORY_REGION_PORTIO_LIST(obj);
+
+    object_unref(&mrpio->mr);
+    g_free(mrpio->ports);
+}
+
+static const TypeInfo memory_region_portio_list_info = {
+    .parent             = TYPE_OBJECT,
+    .name               = TYPE_MEMORY_REGION_PORTIO_LIST,
+    .instance_size      = sizeof(MemoryRegionPortioList),
+    .instance_finalize  = memory_region_portio_list_finalize,
+};
+
+static void ioport_register_types(void)
+{
+    type_register_static(&memory_region_portio_list_info);
+}
+
+type_init(ioport_register_types)
diff --git a/system/main.c b/system/main.c
new file mode 100644
index 0000000..694388b
--- /dev/null
+++ b/system/main.c
@@ -0,0 +1,49 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2020 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-main.h"
+#include "sysemu/sysemu.h"
+
+#ifdef CONFIG_SDL
+#include <SDL.h>
+#endif
+
+int qemu_default_main(void)
+{
+    int status;
+
+    status = qemu_main_loop();
+    qemu_cleanup();
+
+    return status;
+}
+
+int (*qemu_main)(void) = qemu_default_main;
+
+int main(int argc, char **argv)
+{
+    qemu_init(argc, argv);
+    return qemu_main();
+}
diff --git a/system/memory.c b/system/memory.c
new file mode 100644
index 0000000..fa1c99f
--- /dev/null
+++ b/system/memory.c
@@ -0,0 +1,3683 @@
+/*
+ * Physical memory management
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Avi Kivity <avi@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/memory.h"
+#include "qapi/visitor.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/qemu-print.h"
+#include "qom/object.h"
+#include "trace.h"
+
+#include "exec/memory-internal.h"
+#include "exec/ram_addr.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "sysemu/tcg.h"
+#include "qemu/accel.h"
+#include "hw/boards.h"
+#include "migration/vmstate.h"
+#include "exec/address-spaces.h"
+
+//#define DEBUG_UNASSIGNED
+
+static unsigned memory_region_transaction_depth;
+static bool memory_region_update_pending;
+static bool ioeventfd_update_pending;
+unsigned int global_dirty_tracking;
+
+static QTAILQ_HEAD(, MemoryListener) memory_listeners
+    = QTAILQ_HEAD_INITIALIZER(memory_listeners);
+
+static QTAILQ_HEAD(, AddressSpace) address_spaces
+    = QTAILQ_HEAD_INITIALIZER(address_spaces);
+
+static GHashTable *flat_views;
+
+typedef struct AddrRange AddrRange;
+
+/*
+ * Note that signed integers are needed for negative offsetting in aliases
+ * (large MemoryRegion::alias_offset).
+ */
+struct AddrRange {
+    Int128 start;
+    Int128 size;
+};
+
+static AddrRange addrrange_make(Int128 start, Int128 size)
+{
+    return (AddrRange) { start, size };
+}
+
+static bool addrrange_equal(AddrRange r1, AddrRange r2)
+{
+    return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size);
+}
+
+static Int128 addrrange_end(AddrRange r)
+{
+    return int128_add(r.start, r.size);
+}
+
+static AddrRange addrrange_shift(AddrRange range, Int128 delta)
+{
+    int128_addto(&range.start, delta);
+    return range;
+}
+
+static bool addrrange_contains(AddrRange range, Int128 addr)
+{
+    return int128_ge(addr, range.start)
+        && int128_lt(addr, addrrange_end(range));
+}
+
+static bool addrrange_intersects(AddrRange r1, AddrRange r2)
+{
+    return addrrange_contains(r1, r2.start)
+        || addrrange_contains(r2, r1.start);
+}
+
+static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
+{
+    Int128 start = int128_max(r1.start, r2.start);
+    Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2));
+    return addrrange_make(start, int128_sub(end, start));
+}
+
+enum ListenerDirection { Forward, Reverse };
+
+#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...)    \
+    do {                                                                \
+        MemoryListener *_listener;                                      \
+                                                                        \
+        switch (_direction) {                                           \
+        case Forward:                                                   \
+            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, ##_args);           \
+                }                                                       \
+            }                                                           \
+            break;                                                      \
+        case Reverse:                                                   \
+            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, link) { \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, ##_args);           \
+                }                                                       \
+            }                                                           \
+            break;                                                      \
+        default:                                                        \
+            abort();                                                    \
+        }                                                               \
+    } while (0)
+
+#define MEMORY_LISTENER_CALL(_as, _callback, _direction, _section, _args...) \
+    do {                                                                \
+        MemoryListener *_listener;                                      \
+                                                                        \
+        switch (_direction) {                                           \
+        case Forward:                                                   \
+            QTAILQ_FOREACH(_listener, &(_as)->listeners, link_as) {     \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, _section, ##_args); \
+                }                                                       \
+            }                                                           \
+            break;                                                      \
+        case Reverse:                                                   \
+            QTAILQ_FOREACH_REVERSE(_listener, &(_as)->listeners, link_as) { \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, _section, ##_args); \
+                }                                                       \
+            }                                                           \
+            break;                                                      \
+        default:                                                        \
+            abort();                                                    \
+        }                                                               \
+    } while (0)
+
+/* No need to ref/unref .mr, the FlatRange keeps it alive.  */
+#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...)  \
+    do {                                                                \
+        MemoryRegionSection mrs = section_from_flat_range(fr,           \
+                address_space_to_flatview(as));                         \
+        MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args);         \
+    } while(0)
+
+struct CoalescedMemoryRange {
+    AddrRange addr;
+    QTAILQ_ENTRY(CoalescedMemoryRange) link;
+};
+
+struct MemoryRegionIoeventfd {
+    AddrRange addr;
+    bool match_data;
+    uint64_t data;
+    EventNotifier *e;
+};
+
+static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd *a,
+                                           MemoryRegionIoeventfd *b)
+{
+    if (int128_lt(a->addr.start, b->addr.start)) {
+        return true;
+    } else if (int128_gt(a->addr.start, b->addr.start)) {
+        return false;
+    } else if (int128_lt(a->addr.size, b->addr.size)) {
+        return true;
+    } else if (int128_gt(a->addr.size, b->addr.size)) {
+        return false;
+    } else if (a->match_data < b->match_data) {
+        return true;
+    } else  if (a->match_data > b->match_data) {
+        return false;
+    } else if (a->match_data) {
+        if (a->data < b->data) {
+            return true;
+        } else if (a->data > b->data) {
+            return false;
+        }
+    }
+    if (a->e < b->e) {
+        return true;
+    } else if (a->e > b->e) {
+        return false;
+    }
+    return false;
+}
+
+static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd *a,
+                                          MemoryRegionIoeventfd *b)
+{
+    if (int128_eq(a->addr.start, b->addr.start) &&
+        (!int128_nz(a->addr.size) || !int128_nz(b->addr.size) ||
+         (int128_eq(a->addr.size, b->addr.size) &&
+          (a->match_data == b->match_data) &&
+          ((a->match_data && (a->data == b->data)) || !a->match_data) &&
+          (a->e == b->e))))
+        return true;
+
+    return false;
+}
+
+/* Range of memory in the global map.  Addresses are absolute. */
+struct FlatRange {
+    MemoryRegion *mr;
+    hwaddr offset_in_region;
+    AddrRange addr;
+    uint8_t dirty_log_mask;
+    bool romd_mode;
+    bool readonly;
+    bool nonvolatile;
+};
+
+#define FOR_EACH_FLAT_RANGE(var, view)          \
+    for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
+
+static inline MemoryRegionSection
+section_from_flat_range(FlatRange *fr, FlatView *fv)
+{
+    return (MemoryRegionSection) {
+        .mr = fr->mr,
+        .fv = fv,
+        .offset_within_region = fr->offset_in_region,
+        .size = fr->addr.size,
+        .offset_within_address_space = int128_get64(fr->addr.start),
+        .readonly = fr->readonly,
+        .nonvolatile = fr->nonvolatile,
+    };
+}
+
+static bool flatrange_equal(FlatRange *a, FlatRange *b)
+{
+    return a->mr == b->mr
+        && addrrange_equal(a->addr, b->addr)
+        && a->offset_in_region == b->offset_in_region
+        && a->romd_mode == b->romd_mode
+        && a->readonly == b->readonly
+        && a->nonvolatile == b->nonvolatile;
+}
+
+static FlatView *flatview_new(MemoryRegion *mr_root)
+{
+    FlatView *view;
+
+    view = g_new0(FlatView, 1);
+    view->ref = 1;
+    view->root = mr_root;
+    memory_region_ref(mr_root);
+    trace_flatview_new(view, mr_root);
+
+    return view;
+}
+
+/* Insert a range into a given position.  Caller is responsible for maintaining
+ * sorting order.
+ */
+static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
+{
+    if (view->nr == view->nr_allocated) {
+        view->nr_allocated = MAX(2 * view->nr, 10);
+        view->ranges = g_realloc(view->ranges,
+                                    view->nr_allocated * sizeof(*view->ranges));
+    }
+    memmove(view->ranges + pos + 1, view->ranges + pos,
+            (view->nr - pos) * sizeof(FlatRange));
+    view->ranges[pos] = *range;
+    memory_region_ref(range->mr);
+    ++view->nr;
+}
+
+static void flatview_destroy(FlatView *view)
+{
+    int i;
+
+    trace_flatview_destroy(view, view->root);
+    if (view->dispatch) {
+        address_space_dispatch_free(view->dispatch);
+    }
+    for (i = 0; i < view->nr; i++) {
+        memory_region_unref(view->ranges[i].mr);
+    }
+    g_free(view->ranges);
+    memory_region_unref(view->root);
+    g_free(view);
+}
+
+static bool flatview_ref(FlatView *view)
+{
+    return qatomic_fetch_inc_nonzero(&view->ref) > 0;
+}
+
+void flatview_unref(FlatView *view)
+{
+    if (qatomic_fetch_dec(&view->ref) == 1) {
+        trace_flatview_destroy_rcu(view, view->root);
+        assert(view->root);
+        call_rcu(view, flatview_destroy, rcu);
+    }
+}
+
+static bool can_merge(FlatRange *r1, FlatRange *r2)
+{
+    return int128_eq(addrrange_end(r1->addr), r2->addr.start)
+        && r1->mr == r2->mr
+        && int128_eq(int128_add(int128_make64(r1->offset_in_region),
+                                r1->addr.size),
+                     int128_make64(r2->offset_in_region))
+        && r1->dirty_log_mask == r2->dirty_log_mask
+        && r1->romd_mode == r2->romd_mode
+        && r1->readonly == r2->readonly
+        && r1->nonvolatile == r2->nonvolatile;
+}
+
+/* Attempt to simplify a view by merging adjacent ranges */
+static void flatview_simplify(FlatView *view)
+{
+    unsigned i, j, k;
+
+    i = 0;
+    while (i < view->nr) {
+        j = i + 1;
+        while (j < view->nr
+               && can_merge(&view->ranges[j-1], &view->ranges[j])) {
+            int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size);
+            ++j;
+        }
+        ++i;
+        for (k = i; k < j; k++) {
+            memory_region_unref(view->ranges[k].mr);
+        }
+        memmove(&view->ranges[i], &view->ranges[j],
+                (view->nr - j) * sizeof(view->ranges[j]));
+        view->nr -= j - i;
+    }
+}
+
+static bool memory_region_big_endian(MemoryRegion *mr)
+{
+#if TARGET_BIG_ENDIAN
+    return mr->ops->endianness != DEVICE_LITTLE_ENDIAN;
+#else
+    return mr->ops->endianness == DEVICE_BIG_ENDIAN;
+#endif
+}
+
+static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
+{
+    if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) {
+        switch (op & MO_SIZE) {
+        case MO_8:
+            break;
+        case MO_16:
+            *data = bswap16(*data);
+            break;
+        case MO_32:
+            *data = bswap32(*data);
+            break;
+        case MO_64:
+            *data = bswap64(*data);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static inline void memory_region_shift_read_access(uint64_t *value,
+                                                   signed shift,
+                                                   uint64_t mask,
+                                                   uint64_t tmp)
+{
+    if (shift >= 0) {
+        *value |= (tmp & mask) << shift;
+    } else {
+        *value |= (tmp & mask) >> -shift;
+    }
+}
+
+static inline uint64_t memory_region_shift_write_access(uint64_t *value,
+                                                        signed shift,
+                                                        uint64_t mask)
+{
+    uint64_t tmp;
+
+    if (shift >= 0) {
+        tmp = (*value >> shift) & mask;
+    } else {
+        tmp = (*value << -shift) & mask;
+    }
+
+    return tmp;
+}
+
+static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset)
+{
+    MemoryRegion *root;
+    hwaddr abs_addr = offset;
+
+    abs_addr += mr->addr;
+    for (root = mr; root->container; ) {
+        root = root->container;
+        abs_addr += root->addr;
+    }
+
+    return abs_addr;
+}
+
+static int get_cpu_index(void)
+{
+    if (current_cpu) {
+        return current_cpu->cpu_index;
+    }
+    return -1;
+}
+
+static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
+                                                hwaddr addr,
+                                                uint64_t *value,
+                                                unsigned size,
+                                                signed shift,
+                                                uint64_t mask,
+                                                MemTxAttrs attrs)
+{
+    uint64_t tmp;
+
+    tmp = mr->ops->read(mr->opaque, addr, size);
+    if (mr->subpage) {
+        trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
+    } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) {
+        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
+        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size,
+                                     memory_region_name(mr));
+    }
+    memory_region_shift_read_access(value, shift, mask, tmp);
+    return MEMTX_OK;
+}
+
+static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
+                                                          hwaddr addr,
+                                                          uint64_t *value,
+                                                          unsigned size,
+                                                          signed shift,
+                                                          uint64_t mask,
+                                                          MemTxAttrs attrs)
+{
+    uint64_t tmp = 0;
+    MemTxResult r;
+
+    r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
+    if (mr->subpage) {
+        trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
+    } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) {
+        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
+        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size,
+                                     memory_region_name(mr));
+    }
+    memory_region_shift_read_access(value, shift, mask, tmp);
+    return r;
+}
+
+static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
+                                                hwaddr addr,
+                                                uint64_t *value,
+                                                unsigned size,
+                                                signed shift,
+                                                uint64_t mask,
+                                                MemTxAttrs attrs)
+{
+    uint64_t tmp = memory_region_shift_write_access(value, shift, mask);
+
+    if (mr->subpage) {
+        trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
+    } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) {
+        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
+        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size,
+                                      memory_region_name(mr));
+    }
+    mr->ops->write(mr->opaque, addr, tmp, size);
+    return MEMTX_OK;
+}
+
+static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
+                                                           hwaddr addr,
+                                                           uint64_t *value,
+                                                           unsigned size,
+                                                           signed shift,
+                                                           uint64_t mask,
+                                                           MemTxAttrs attrs)
+{
+    uint64_t tmp = memory_region_shift_write_access(value, shift, mask);
+
+    if (mr->subpage) {
+        trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
+    } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) {
+        hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
+        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size,
+                                      memory_region_name(mr));
+    }
+    return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
+}
+
+static MemTxResult access_with_adjusted_size(hwaddr addr,
+                                      uint64_t *value,
+                                      unsigned size,
+                                      unsigned access_size_min,
+                                      unsigned access_size_max,
+                                      MemTxResult (*access_fn)
+                                                  (MemoryRegion *mr,
+                                                   hwaddr addr,
+                                                   uint64_t *value,
+                                                   unsigned size,
+                                                   signed shift,
+                                                   uint64_t mask,
+                                                   MemTxAttrs attrs),
+                                      MemoryRegion *mr,
+                                      MemTxAttrs attrs)
+{
+    uint64_t access_mask;
+    unsigned access_size;
+    unsigned i;
+    MemTxResult r = MEMTX_OK;
+    bool reentrancy_guard_applied = false;
+
+    if (!access_size_min) {
+        access_size_min = 1;
+    }
+    if (!access_size_max) {
+        access_size_max = 4;
+    }
+
+    /* Do not allow more than one simultaneous access to a device's IO Regions */
+    if (mr->dev && !mr->disable_reentrancy_guard &&
+        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
+            warn_report_once("Blocked re-entrant IO on MemoryRegion: "
+                             "%s at addr: 0x%" HWADDR_PRIX,
+                             memory_region_name(mr), addr);
+            return MEMTX_ACCESS_ERROR;
+        }
+        mr->dev->mem_reentrancy_guard.engaged_in_io = true;
+        reentrancy_guard_applied = true;
+    }
+
+    /* FIXME: support unaligned access? */
+    access_size = MAX(MIN(size, access_size_max), access_size_min);
+    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
+    if (memory_region_big_endian(mr)) {
+        for (i = 0; i < size; i += access_size) {
+            r |= access_fn(mr, addr + i, value, access_size,
+                        (size - access_size - i) * 8, access_mask, attrs);
+        }
+    } else {
+        for (i = 0; i < size; i += access_size) {
+            r |= access_fn(mr, addr + i, value, access_size, i * 8,
+                        access_mask, attrs);
+        }
+    }
+    if (mr->dev && reentrancy_guard_applied) {
+        mr->dev->mem_reentrancy_guard.engaged_in_io = false;
+    }
+    return r;
+}
+
+static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
+{
+    AddressSpace *as;
+
+    while (mr->container) {
+        mr = mr->container;
+    }
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        if (mr == as->root) {
+            return as;
+        }
+    }
+    return NULL;
+}
+
+/* Render a memory region into the global view.  Ranges in @view obscure
+ * ranges in @mr.
+ */
+static void render_memory_region(FlatView *view,
+                                 MemoryRegion *mr,
+                                 Int128 base,
+                                 AddrRange clip,
+                                 bool readonly,
+                                 bool nonvolatile)
+{
+    MemoryRegion *subregion;
+    unsigned i;
+    hwaddr offset_in_region;
+    Int128 remain;
+    Int128 now;
+    FlatRange fr;
+    AddrRange tmp;
+
+    if (!mr->enabled) {
+        return;
+    }
+
+    int128_addto(&base, int128_make64(mr->addr));
+    readonly |= mr->readonly;
+    nonvolatile |= mr->nonvolatile;
+
+    tmp = addrrange_make(base, mr->size);
+
+    if (!addrrange_intersects(tmp, clip)) {
+        return;
+    }
+
+    clip = addrrange_intersection(tmp, clip);
+
+    if (mr->alias) {
+        int128_subfrom(&base, int128_make64(mr->alias->addr));
+        int128_subfrom(&base, int128_make64(mr->alias_offset));
+        render_memory_region(view, mr->alias, base, clip,
+                             readonly, nonvolatile);
+        return;
+    }
+
+    /* Render subregions in priority order. */
+    QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
+        render_memory_region(view, subregion, base, clip,
+                             readonly, nonvolatile);
+    }
+
+    if (!mr->terminates) {
+        return;
+    }
+
+    offset_in_region = int128_get64(int128_sub(clip.start, base));
+    base = clip.start;
+    remain = clip.size;
+
+    fr.mr = mr;
+    fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+    fr.romd_mode = mr->romd_mode;
+    fr.readonly = readonly;
+    fr.nonvolatile = nonvolatile;
+
+    /* Render the region itself into any gaps left by the current view. */
+    for (i = 0; i < view->nr && int128_nz(remain); ++i) {
+        if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
+            continue;
+        }
+        if (int128_lt(base, view->ranges[i].addr.start)) {
+            now = int128_min(remain,
+                             int128_sub(view->ranges[i].addr.start, base));
+            fr.offset_in_region = offset_in_region;
+            fr.addr = addrrange_make(base, now);
+            flatview_insert(view, i, &fr);
+            ++i;
+            int128_addto(&base, now);
+            offset_in_region += int128_get64(now);
+            int128_subfrom(&remain, now);
+        }
+        now = int128_sub(int128_min(int128_add(base, remain),
+                                    addrrange_end(view->ranges[i].addr)),
+                         base);
+        int128_addto(&base, now);
+        offset_in_region += int128_get64(now);
+        int128_subfrom(&remain, now);
+    }
+    if (int128_nz(remain)) {
+        fr.offset_in_region = offset_in_region;
+        fr.addr = addrrange_make(base, remain);
+        flatview_insert(view, i, &fr);
+    }
+}
+
+void flatview_for_each_range(FlatView *fv, flatview_cb cb , void *opaque)
+{
+    FlatRange *fr;
+
+    assert(fv);
+    assert(cb);
+
+    FOR_EACH_FLAT_RANGE(fr, fv) {
+        if (cb(fr->addr.start, fr->addr.size, fr->mr,
+               fr->offset_in_region, opaque)) {
+            break;
+        }
+    }
+}
+
+static MemoryRegion *memory_region_get_flatview_root(MemoryRegion *mr)
+{
+    while (mr->enabled) {
+        if (mr->alias) {
+            if (!mr->alias_offset && int128_ge(mr->size, mr->alias->size)) {
+                /* The alias is included in its entirety.  Use it as
+                 * the "real" root, so that we can share more FlatViews.
+                 */
+                mr = mr->alias;
+                continue;
+            }
+        } else if (!mr->terminates) {
+            unsigned int found = 0;
+            MemoryRegion *child, *next = NULL;
+            QTAILQ_FOREACH(child, &mr->subregions, subregions_link) {
+                if (child->enabled) {
+                    if (++found > 1) {
+                        next = NULL;
+                        break;
+                    }
+                    if (!child->addr && int128_ge(mr->size, child->size)) {
+                        /* A child is included in its entirety.  If it's the only
+                         * enabled one, use it in the hope of finding an alias down the
+                         * way. This will also let us share FlatViews.
+                         */
+                        next = child;
+                    }
+                }
+            }
+            if (found == 0) {
+                return NULL;
+            }
+            if (next) {
+                mr = next;
+                continue;
+            }
+        }
+
+        return mr;
+    }
+
+    return NULL;
+}
+
+/* Render a memory topology into a list of disjoint absolute ranges. */
+static FlatView *generate_memory_topology(MemoryRegion *mr)
+{
+    int i;
+    FlatView *view;
+
+    view = flatview_new(mr);
+
+    if (mr) {
+        render_memory_region(view, mr, int128_zero(),
+                             addrrange_make(int128_zero(), int128_2_64()),
+                             false, false);
+    }
+    flatview_simplify(view);
+
+    view->dispatch = address_space_dispatch_new(view);
+    for (i = 0; i < view->nr; i++) {
+        MemoryRegionSection mrs =
+            section_from_flat_range(&view->ranges[i], view);
+        flatview_add_to_dispatch(view, &mrs);
+    }
+    address_space_dispatch_compact(view->dispatch);
+    g_hash_table_replace(flat_views, mr, view);
+
+    return view;
+}
+
+static void address_space_add_del_ioeventfds(AddressSpace *as,
+                                             MemoryRegionIoeventfd *fds_new,
+                                             unsigned fds_new_nb,
+                                             MemoryRegionIoeventfd *fds_old,
+                                             unsigned fds_old_nb)
+{
+    unsigned iold, inew;
+    MemoryRegionIoeventfd *fd;
+    MemoryRegionSection section;
+
+    /* Generate a symmetric difference of the old and new fd sets, adding
+     * and deleting as necessary.
+     */
+
+    iold = inew = 0;
+    while (iold < fds_old_nb || inew < fds_new_nb) {
+        if (iold < fds_old_nb
+            && (inew == fds_new_nb
+                || memory_region_ioeventfd_before(&fds_old[iold],
+                                                  &fds_new[inew]))) {
+            fd = &fds_old[iold];
+            section = (MemoryRegionSection) {
+                .fv = address_space_to_flatview(as),
+                .offset_within_address_space = int128_get64(fd->addr.start),
+                .size = fd->addr.size,
+            };
+            MEMORY_LISTENER_CALL(as, eventfd_del, Forward, &section,
+                                 fd->match_data, fd->data, fd->e);
+            ++iold;
+        } else if (inew < fds_new_nb
+                   && (iold == fds_old_nb
+                       || memory_region_ioeventfd_before(&fds_new[inew],
+                                                         &fds_old[iold]))) {
+            fd = &fds_new[inew];
+            section = (MemoryRegionSection) {
+                .fv = address_space_to_flatview(as),
+                .offset_within_address_space = int128_get64(fd->addr.start),
+                .size = fd->addr.size,
+            };
+            MEMORY_LISTENER_CALL(as, eventfd_add, Reverse, &section,
+                                 fd->match_data, fd->data, fd->e);
+            ++inew;
+        } else {
+            ++iold;
+            ++inew;
+        }
+    }
+}
+
+FlatView *address_space_get_flatview(AddressSpace *as)
+{
+    FlatView *view;
+
+    RCU_READ_LOCK_GUARD();
+    do {
+        view = address_space_to_flatview(as);
+        /* If somebody has replaced as->current_map concurrently,
+         * flatview_ref returns false.
+         */
+    } while (!flatview_ref(view));
+    return view;
+}
+
+static void address_space_update_ioeventfds(AddressSpace *as)
+{
+    FlatView *view;
+    FlatRange *fr;
+    unsigned ioeventfd_nb = 0;
+    unsigned ioeventfd_max;
+    MemoryRegionIoeventfd *ioeventfds;
+    AddrRange tmp;
+    unsigned i;
+
+    if (!as->ioeventfd_notifiers) {
+        return;
+    }
+
+    /*
+     * It is likely that the number of ioeventfds hasn't changed much, so use
+     * the previous size as the starting value, with some headroom to avoid
+     * gratuitous reallocations.
+     */
+    ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4);
+    ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max);
+
+    view = address_space_get_flatview(as);
+    FOR_EACH_FLAT_RANGE(fr, view) {
+        for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
+            tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
+                                  int128_sub(fr->addr.start,
+                                             int128_make64(fr->offset_in_region)));
+            if (addrrange_intersects(fr->addr, tmp)) {
+                ++ioeventfd_nb;
+                if (ioeventfd_nb > ioeventfd_max) {
+                    ioeventfd_max = MAX(ioeventfd_max * 2, 4);
+                    ioeventfds = g_realloc(ioeventfds,
+                            ioeventfd_max * sizeof(*ioeventfds));
+                }
+                ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
+                ioeventfds[ioeventfd_nb-1].addr = tmp;
+            }
+        }
+    }
+
+    address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb,
+                                     as->ioeventfds, as->ioeventfd_nb);
+
+    g_free(as->ioeventfds);
+    as->ioeventfds = ioeventfds;
+    as->ioeventfd_nb = ioeventfd_nb;
+    flatview_unref(view);
+}
+
+/*
+ * Notify the memory listeners about the coalesced IO change events of
+ * range `cmr'.  Only the part that has intersection of the specified
+ * FlatRange will be sent.
+ */
+static void flat_range_coalesced_io_notify(FlatRange *fr, AddressSpace *as,
+                                           CoalescedMemoryRange *cmr, bool add)
+{
+    AddrRange tmp;
+
+    tmp = addrrange_shift(cmr->addr,
+                          int128_sub(fr->addr.start,
+                                     int128_make64(fr->offset_in_region)));
+    if (!addrrange_intersects(tmp, fr->addr)) {
+        return;
+    }
+    tmp = addrrange_intersection(tmp, fr->addr);
+
+    if (add) {
+        MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add,
+                                      int128_get64(tmp.start),
+                                      int128_get64(tmp.size));
+    } else {
+        MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del,
+                                      int128_get64(tmp.start),
+                                      int128_get64(tmp.size));
+    }
+}
+
+static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as)
+{
+    CoalescedMemoryRange *cmr;
+
+    QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) {
+        flat_range_coalesced_io_notify(fr, as, cmr, false);
+    }
+}
+
+static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as)
+{
+    MemoryRegion *mr = fr->mr;
+    CoalescedMemoryRange *cmr;
+
+    if (QTAILQ_EMPTY(&mr->coalesced)) {
+        return;
+    }
+
+    QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
+        flat_range_coalesced_io_notify(fr, as, cmr, true);
+    }
+}
+
+static void address_space_update_topology_pass(AddressSpace *as,
+                                               const FlatView *old_view,
+                                               const FlatView *new_view,
+                                               bool adding)
+{
+    unsigned iold, inew;
+    FlatRange *frold, *frnew;
+
+    /* Generate a symmetric difference of the old and new memory maps.
+     * Kill ranges in the old map, and instantiate ranges in the new map.
+     */
+    iold = inew = 0;
+    while (iold < old_view->nr || inew < new_view->nr) {
+        if (iold < old_view->nr) {
+            frold = &old_view->ranges[iold];
+        } else {
+            frold = NULL;
+        }
+        if (inew < new_view->nr) {
+            frnew = &new_view->ranges[inew];
+        } else {
+            frnew = NULL;
+        }
+
+        if (frold
+            && (!frnew
+                || int128_lt(frold->addr.start, frnew->addr.start)
+                || (int128_eq(frold->addr.start, frnew->addr.start)
+                    && !flatrange_equal(frold, frnew)))) {
+            /* In old but not in new, or in both but attributes changed. */
+
+            if (!adding) {
+                flat_range_coalesced_io_del(frold, as);
+                MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
+            }
+
+            ++iold;
+        } else if (frold && frnew && flatrange_equal(frold, frnew)) {
+            /* In both and unchanged (except logging may have changed) */
+
+            if (adding) {
+                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
+                if (frnew->dirty_log_mask & ~frold->dirty_log_mask) {
+                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start,
+                                                  frold->dirty_log_mask,
+                                                  frnew->dirty_log_mask);
+                }
+                if (frold->dirty_log_mask & ~frnew->dirty_log_mask) {
+                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop,
+                                                  frold->dirty_log_mask,
+                                                  frnew->dirty_log_mask);
+                }
+            }
+
+            ++iold;
+            ++inew;
+        } else {
+            /* In new */
+
+            if (adding) {
+                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
+                flat_range_coalesced_io_add(frnew, as);
+            }
+
+            ++inew;
+        }
+    }
+}
+
+static void flatviews_init(void)
+{
+    static FlatView *empty_view;
+
+    if (flat_views) {
+        return;
+    }
+
+    flat_views = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL,
+                                       (GDestroyNotify) flatview_unref);
+    if (!empty_view) {
+        empty_view = generate_memory_topology(NULL);
+        /* We keep it alive forever in the global variable.  */
+        flatview_ref(empty_view);
+    } else {
+        g_hash_table_replace(flat_views, NULL, empty_view);
+        flatview_ref(empty_view);
+    }
+}
+
+static void flatviews_reset(void)
+{
+    AddressSpace *as;
+
+    if (flat_views) {
+        g_hash_table_unref(flat_views);
+        flat_views = NULL;
+    }
+    flatviews_init();
+
+    /* Render unique FVs */
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+
+        if (g_hash_table_lookup(flat_views, physmr)) {
+            continue;
+        }
+
+        generate_memory_topology(physmr);
+    }
+}
+
+static void address_space_set_flatview(AddressSpace *as)
+{
+    FlatView *old_view = address_space_to_flatview(as);
+    MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+    FlatView *new_view = g_hash_table_lookup(flat_views, physmr);
+
+    assert(new_view);
+
+    if (old_view == new_view) {
+        return;
+    }
+
+    if (old_view) {
+        flatview_ref(old_view);
+    }
+
+    flatview_ref(new_view);
+
+    if (!QTAILQ_EMPTY(&as->listeners)) {
+        FlatView tmpview = { .nr = 0 }, *old_view2 = old_view;
+
+        if (!old_view2) {
+            old_view2 = &tmpview;
+        }
+        address_space_update_topology_pass(as, old_view2, new_view, false);
+        address_space_update_topology_pass(as, old_view2, new_view, true);
+    }
+
+    /* Writes are protected by the BQL.  */
+    qatomic_rcu_set(&as->current_map, new_view);
+    if (old_view) {
+        flatview_unref(old_view);
+    }
+
+    /* Note that all the old MemoryRegions are still alive up to this
+     * point.  This relieves most MemoryListeners from the need to
+     * ref/unref the MemoryRegions they get---unless they use them
+     * outside the iothread mutex, in which case precise reference
+     * counting is necessary.
+     */
+    if (old_view) {
+        flatview_unref(old_view);
+    }
+}
+
+static void address_space_update_topology(AddressSpace *as)
+{
+    MemoryRegion *physmr = memory_region_get_flatview_root(as->root);
+
+    flatviews_init();
+    if (!g_hash_table_lookup(flat_views, physmr)) {
+        generate_memory_topology(physmr);
+    }
+    address_space_set_flatview(as);
+}
+
+void memory_region_transaction_begin(void)
+{
+    qemu_flush_coalesced_mmio_buffer();
+    ++memory_region_transaction_depth;
+}
+
+void memory_region_transaction_commit(void)
+{
+    AddressSpace *as;
+
+    assert(memory_region_transaction_depth);
+    assert(qemu_mutex_iothread_locked());
+
+    --memory_region_transaction_depth;
+    if (!memory_region_transaction_depth) {
+        if (memory_region_update_pending) {
+            flatviews_reset();
+
+            MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
+
+            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+                address_space_set_flatview(as);
+                address_space_update_ioeventfds(as);
+            }
+            memory_region_update_pending = false;
+            ioeventfd_update_pending = false;
+            MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+        } else if (ioeventfd_update_pending) {
+            QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+                address_space_update_ioeventfds(as);
+            }
+            ioeventfd_update_pending = false;
+        }
+   }
+}
+
+static void memory_region_destructor_none(MemoryRegion *mr)
+{
+}
+
+static void memory_region_destructor_ram(MemoryRegion *mr)
+{
+    qemu_ram_free(mr->ram_block);
+}
+
+static bool memory_region_need_escape(char c)
+{
+    return c == '/' || c == '[' || c == '\\' || c == ']';
+}
+
+static char *memory_region_escape_name(const char *name)
+{
+    const char *p;
+    char *escaped, *q;
+    uint8_t c;
+    size_t bytes = 0;
+
+    for (p = name; *p; p++) {
+        bytes += memory_region_need_escape(*p) ? 4 : 1;
+    }
+    if (bytes == p - name) {
+       return g_memdup(name, bytes + 1);
+    }
+
+    escaped = g_malloc(bytes + 1);
+    for (p = name, q = escaped; *p; p++) {
+        c = *p;
+        if (unlikely(memory_region_need_escape(c))) {
+            *q++ = '\\';
+            *q++ = 'x';
+            *q++ = "0123456789abcdef"[c >> 4];
+            c = "0123456789abcdef"[c & 15];
+        }
+        *q++ = c;
+    }
+    *q = 0;
+    return escaped;
+}
+
+static void memory_region_do_init(MemoryRegion *mr,
+                                  Object *owner,
+                                  const char *name,
+                                  uint64_t size)
+{
+    mr->size = int128_make64(size);
+    if (size == UINT64_MAX) {
+        mr->size = int128_2_64();
+    }
+    mr->name = g_strdup(name);
+    mr->owner = owner;
+    mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
+    mr->ram_block = NULL;
+
+    if (name) {
+        char *escaped_name = memory_region_escape_name(name);
+        char *name_array = g_strdup_printf("%s[*]", escaped_name);
+
+        if (!owner) {
+            owner = container_get(qdev_get_machine(), "/unattached");
+        }
+
+        object_property_add_child(owner, name_array, OBJECT(mr));
+        object_unref(OBJECT(mr));
+        g_free(name_array);
+        g_free(escaped_name);
+    }
+}
+
+void memory_region_init(MemoryRegion *mr,
+                        Object *owner,
+                        const char *name,
+                        uint64_t size)
+{
+    object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
+    memory_region_do_init(mr, owner, name, size);
+}
+
+static void memory_region_get_container(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+    char *path = (char *)"";
+
+    if (mr->container) {
+        path = object_get_canonical_path(OBJECT(mr->container));
+    }
+    visit_type_str(v, name, &path, errp);
+    if (mr->container) {
+        g_free(path);
+    }
+}
+
+static Object *memory_region_resolve_container(Object *obj, void *opaque,
+                                               const char *part)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+
+    return OBJECT(mr->container);
+}
+
+static void memory_region_get_priority(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+    int32_t value = mr->priority;
+
+    visit_type_int32(v, name, &value, errp);
+}
+
+static void memory_region_get_size(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+    uint64_t value = memory_region_size(mr);
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void memory_region_initfn(Object *obj)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+    ObjectProperty *op;
+
+    mr->ops = &unassigned_mem_ops;
+    mr->enabled = true;
+    mr->romd_mode = true;
+    mr->destructor = memory_region_destructor_none;
+    QTAILQ_INIT(&mr->subregions);
+    QTAILQ_INIT(&mr->coalesced);
+
+    op = object_property_add(OBJECT(mr), "container",
+                             "link<" TYPE_MEMORY_REGION ">",
+                             memory_region_get_container,
+                             NULL, /* memory_region_set_container */
+                             NULL, NULL);
+    op->resolve = memory_region_resolve_container;
+
+    object_property_add_uint64_ptr(OBJECT(mr), "addr",
+                                   &mr->addr, OBJ_PROP_FLAG_READ);
+    object_property_add(OBJECT(mr), "priority", "uint32",
+                        memory_region_get_priority,
+                        NULL, /* memory_region_set_priority */
+                        NULL, NULL);
+    object_property_add(OBJECT(mr), "size", "uint64",
+                        memory_region_get_size,
+                        NULL, /* memory_region_set_size, */
+                        NULL, NULL);
+}
+
+static void iommu_memory_region_initfn(Object *obj)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+
+    mr->is_iommu = true;
+}
+
+static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+#ifdef DEBUG_UNASSIGNED
+    printf("Unassigned mem read " HWADDR_FMT_plx "\n", addr);
+#endif
+    return 0;
+}
+
+static void unassigned_mem_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+#ifdef DEBUG_UNASSIGNED
+    printf("Unassigned mem write " HWADDR_FMT_plx " = 0x%"PRIx64"\n", addr, val);
+#endif
+}
+
+static bool unassigned_mem_accepts(void *opaque, hwaddr addr,
+                                   unsigned size, bool is_write,
+                                   MemTxAttrs attrs)
+{
+    return false;
+}
+
+const MemoryRegionOps unassigned_mem_ops = {
+    .valid.accepts = unassigned_mem_accepts,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static uint64_t memory_region_ram_device_read(void *opaque,
+                                              hwaddr addr, unsigned size)
+{
+    MemoryRegion *mr = opaque;
+    uint64_t data = (uint64_t)~0;
+
+    switch (size) {
+    case 1:
+        data = *(uint8_t *)(mr->ram_block->host + addr);
+        break;
+    case 2:
+        data = *(uint16_t *)(mr->ram_block->host + addr);
+        break;
+    case 4:
+        data = *(uint32_t *)(mr->ram_block->host + addr);
+        break;
+    case 8:
+        data = *(uint64_t *)(mr->ram_block->host + addr);
+        break;
+    }
+
+    trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size);
+
+    return data;
+}
+
+static void memory_region_ram_device_write(void *opaque, hwaddr addr,
+                                           uint64_t data, unsigned size)
+{
+    MemoryRegion *mr = opaque;
+
+    trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size);
+
+    switch (size) {
+    case 1:
+        *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data;
+        break;
+    case 2:
+        *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data;
+        break;
+    case 4:
+        *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data;
+        break;
+    case 8:
+        *(uint64_t *)(mr->ram_block->host + addr) = data;
+        break;
+    }
+}
+
+static const MemoryRegionOps ram_device_mem_ops = {
+    .read = memory_region_ram_device_read,
+    .write = memory_region_ram_device_write,
+    .endianness = DEVICE_HOST_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = true,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = true,
+    },
+};
+
+bool memory_region_access_valid(MemoryRegion *mr,
+                                hwaddr addr,
+                                unsigned size,
+                                bool is_write,
+                                MemTxAttrs attrs)
+{
+    if (mr->ops->valid.accepts
+        && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: rejected\n",
+                      is_write ? "write" : "read",
+                      addr, size, memory_region_name(mr));
+        return false;
+    }
+
+    if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: unaligned\n",
+                      is_write ? "write" : "read",
+                      addr, size, memory_region_name(mr));
+        return false;
+    }
+
+    /* Treat zero as compatibility all valid */
+    if (!mr->ops->valid.max_access_size) {
+        return true;
+    }
+
+    if (size > mr->ops->valid.max_access_size
+        || size < mr->ops->valid.min_access_size) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: invalid size "
+                      "(min:%u max:%u)\n",
+                      is_write ? "write" : "read",
+                      addr, size, memory_region_name(mr),
+                      mr->ops->valid.min_access_size,
+                      mr->ops->valid.max_access_size);
+        return false;
+    }
+    return true;
+}
+
+static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
+                                                hwaddr addr,
+                                                uint64_t *pval,
+                                                unsigned size,
+                                                MemTxAttrs attrs)
+{
+    *pval = 0;
+
+    if (mr->ops->read) {
+        return access_with_adjusted_size(addr, pval, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_read_accessor,
+                                         mr, attrs);
+    } else {
+        return access_with_adjusted_size(addr, pval, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_read_with_attrs_accessor,
+                                         mr, attrs);
+    }
+}
+
+MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
+                                        hwaddr addr,
+                                        uint64_t *pval,
+                                        MemOp op,
+                                        MemTxAttrs attrs)
+{
+    unsigned size = memop_size(op);
+    MemTxResult r;
+
+    if (mr->alias) {
+        return memory_region_dispatch_read(mr->alias,
+                                           mr->alias_offset + addr,
+                                           pval, op, attrs);
+    }
+    if (!memory_region_access_valid(mr, addr, size, false, attrs)) {
+        *pval = unassigned_mem_read(mr, addr, size);
+        return MEMTX_DECODE_ERROR;
+    }
+
+    r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
+    adjust_endianness(mr, pval, op);
+    return r;
+}
+
+/* Return true if an eventfd was signalled */
+static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
+                                                    hwaddr addr,
+                                                    uint64_t data,
+                                                    unsigned size,
+                                                    MemTxAttrs attrs)
+{
+    MemoryRegionIoeventfd ioeventfd = {
+        .addr = addrrange_make(int128_make64(addr), int128_make64(size)),
+        .data = data,
+    };
+    unsigned i;
+
+    for (i = 0; i < mr->ioeventfd_nb; i++) {
+        ioeventfd.match_data = mr->ioeventfds[i].match_data;
+        ioeventfd.e = mr->ioeventfds[i].e;
+
+        if (memory_region_ioeventfd_equal(&ioeventfd, &mr->ioeventfds[i])) {
+            event_notifier_set(ioeventfd.e);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
+                                         hwaddr addr,
+                                         uint64_t data,
+                                         MemOp op,
+                                         MemTxAttrs attrs)
+{
+    unsigned size = memop_size(op);
+
+    if (mr->alias) {
+        return memory_region_dispatch_write(mr->alias,
+                                            mr->alias_offset + addr,
+                                            data, op, attrs);
+    }
+    if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
+        unassigned_mem_write(mr, addr, data, size);
+        return MEMTX_DECODE_ERROR;
+    }
+
+    adjust_endianness(mr, &data, op);
+
+    if ((!kvm_eventfds_enabled()) &&
+        memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
+        return MEMTX_OK;
+    }
+
+    if (mr->ops->write) {
+        return access_with_adjusted_size(addr, &data, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_write_accessor, mr,
+                                         attrs);
+    } else {
+        return
+            access_with_adjusted_size(addr, &data, size,
+                                      mr->ops->impl.min_access_size,
+                                      mr->ops->impl.max_access_size,
+                                      memory_region_write_with_attrs_accessor,
+                                      mr, attrs);
+    }
+}
+
+void memory_region_init_io(MemoryRegion *mr,
+                           Object *owner,
+                           const MemoryRegionOps *ops,
+                           void *opaque,
+                           const char *name,
+                           uint64_t size)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->ops = ops ? ops : &unassigned_mem_ops;
+    mr->opaque = opaque;
+    mr->terminates = true;
+}
+
+void memory_region_init_ram_nomigrate(MemoryRegion *mr,
+                                      Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
+{
+    memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
+}
+
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+                                            Object *owner,
+                                            const char *name,
+                                            uint64_t size,
+                                            uint32_t ram_flags,
+                                            Error **errp)
+{
+    Error *err = NULL;
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_block = qemu_ram_alloc(size, ram_flags, mr, &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
+}
+
+void memory_region_init_resizeable_ram(MemoryRegion *mr,
+                                       Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       uint64_t max_size,
+                                       void (*resized)(const char*,
+                                                       uint64_t length,
+                                                       void *host),
+                                       Error **errp)
+{
+    Error *err = NULL;
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized,
+                                              mr, &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
+}
+
+#ifdef CONFIG_POSIX
+void memory_region_init_ram_from_file(MemoryRegion *mr,
+                                      Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      uint64_t align,
+                                      uint32_t ram_flags,
+                                      const char *path,
+                                      ram_addr_t offset,
+                                      Error **errp)
+{
+    Error *err = NULL;
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->align = align;
+    mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path,
+                                             offset, &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
+}
+
+void memory_region_init_ram_from_fd(MemoryRegion *mr,
+                                    Object *owner,
+                                    const char *name,
+                                    uint64_t size,
+                                    uint32_t ram_flags,
+                                    int fd,
+                                    ram_addr_t offset,
+                                    Error **errp)
+{
+    Error *err = NULL;
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
+                                           &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
+}
+#endif
+
+void memory_region_init_ram_ptr(MemoryRegion *mr,
+                                Object *owner,
+                                const char *name,
+                                uint64_t size,
+                                void *ptr)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->destructor = memory_region_destructor_ram;
+
+    /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL.  */
+    assert(ptr != NULL);
+    mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
+}
+
+void memory_region_init_ram_device_ptr(MemoryRegion *mr,
+                                       Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       void *ptr)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->ram = true;
+    mr->terminates = true;
+    mr->ram_device = true;
+    mr->ops = &ram_device_mem_ops;
+    mr->opaque = mr;
+    mr->destructor = memory_region_destructor_ram;
+
+    /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL.  */
+    assert(ptr != NULL);
+    mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
+}
+
+void memory_region_init_alias(MemoryRegion *mr,
+                              Object *owner,
+                              const char *name,
+                              MemoryRegion *orig,
+                              hwaddr offset,
+                              uint64_t size)
+{
+    memory_region_init(mr, owner, name, size);
+    mr->alias = orig;
+    mr->alias_offset = offset;
+}
+
+void memory_region_init_rom_nomigrate(MemoryRegion *mr,
+                                      Object *owner,
+                                      const char *name,
+                                      uint64_t size,
+                                      Error **errp)
+{
+    memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
+    mr->readonly = true;
+}
+
+void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
+                                             Object *owner,
+                                             const MemoryRegionOps *ops,
+                                             void *opaque,
+                                             const char *name,
+                                             uint64_t size,
+                                             Error **errp)
+{
+    Error *err = NULL;
+    assert(ops);
+    memory_region_init(mr, owner, name, size);
+    mr->ops = ops;
+    mr->opaque = opaque;
+    mr->terminates = true;
+    mr->rom_device = true;
+    mr->destructor = memory_region_destructor_ram;
+    mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
+    if (err) {
+        mr->size = int128_zero();
+        object_unparent(OBJECT(mr));
+        error_propagate(errp, err);
+    }
+}
+
+void memory_region_init_iommu(void *_iommu_mr,
+                              size_t instance_size,
+                              const char *mrtypename,
+                              Object *owner,
+                              const char *name,
+                              uint64_t size)
+{
+    struct IOMMUMemoryRegion *iommu_mr;
+    struct MemoryRegion *mr;
+
+    object_initialize(_iommu_mr, instance_size, mrtypename);
+    mr = MEMORY_REGION(_iommu_mr);
+    memory_region_do_init(mr, owner, name, size);
+    iommu_mr = IOMMU_MEMORY_REGION(mr);
+    mr->terminates = true;  /* then re-forwards */
+    QLIST_INIT(&iommu_mr->iommu_notify);
+    iommu_mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
+}
+
+static void memory_region_finalize(Object *obj)
+{
+    MemoryRegion *mr = MEMORY_REGION(obj);
+
+    assert(!mr->container);
+
+    /* We know the region is not visible in any address space (it
+     * does not have a container and cannot be a root either because
+     * it has no references, so we can blindly clear mr->enabled.
+     * memory_region_set_enabled instead could trigger a transaction
+     * and cause an infinite loop.
+     */
+    mr->enabled = false;
+    memory_region_transaction_begin();
+    while (!QTAILQ_EMPTY(&mr->subregions)) {
+        MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions);
+        memory_region_del_subregion(mr, subregion);
+    }
+    memory_region_transaction_commit();
+
+    mr->destructor(mr);
+    memory_region_clear_coalescing(mr);
+    g_free((char *)mr->name);
+    g_free(mr->ioeventfds);
+}
+
+Object *memory_region_owner(MemoryRegion *mr)
+{
+    Object *obj = OBJECT(mr);
+    return obj->parent;
+}
+
+void memory_region_ref(MemoryRegion *mr)
+{
+    /* MMIO callbacks most likely will access data that belongs
+     * to the owner, hence the need to ref/unref the owner whenever
+     * the memory region is in use.
+     *
+     * The memory region is a child of its owner.  As long as the
+     * owner doesn't call unparent itself on the memory region,
+     * ref-ing the owner will also keep the memory region alive.
+     * Memory regions without an owner are supposed to never go away;
+     * we do not ref/unref them because it slows down DMA sensibly.
+     */
+    if (mr && mr->owner) {
+        object_ref(mr->owner);
+    }
+}
+
+void memory_region_unref(MemoryRegion *mr)
+{
+    if (mr && mr->owner) {
+        object_unref(mr->owner);
+    }
+}
+
+uint64_t memory_region_size(MemoryRegion *mr)
+{
+    if (int128_eq(mr->size, int128_2_64())) {
+        return UINT64_MAX;
+    }
+    return int128_get64(mr->size);
+}
+
+const char *memory_region_name(const MemoryRegion *mr)
+{
+    if (!mr->name) {
+        ((MemoryRegion *)mr)->name =
+            g_strdup(object_get_canonical_path_component(OBJECT(mr)));
+    }
+    return mr->name;
+}
+
+bool memory_region_is_ram_device(MemoryRegion *mr)
+{
+    return mr->ram_device;
+}
+
+bool memory_region_is_protected(MemoryRegion *mr)
+{
+    return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
+}
+
+uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
+{
+    uint8_t mask = mr->dirty_log_mask;
+    RAMBlock *rb = mr->ram_block;
+
+    if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) ||
+                             memory_region_is_iommu(mr))) {
+        mask |= (1 << DIRTY_MEMORY_MIGRATION);
+    }
+
+    if (tcg_enabled() && rb) {
+        /* TCG only cares about dirty memory logging for RAM, not IOMMU.  */
+        mask |= (1 << DIRTY_MEMORY_CODE);
+    }
+    return mask;
+}
+
+bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
+{
+    return memory_region_get_dirty_log_mask(mr) & (1 << client);
+}
+
+static int memory_region_update_iommu_notify_flags(IOMMUMemoryRegion *iommu_mr,
+                                                   Error **errp)
+{
+    IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
+    IOMMUNotifier *iommu_notifier;
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    int ret = 0;
+
+    IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) {
+        flags |= iommu_notifier->notifier_flags;
+    }
+
+    if (flags != iommu_mr->iommu_notify_flags && imrc->notify_flag_changed) {
+        ret = imrc->notify_flag_changed(iommu_mr,
+                                        iommu_mr->iommu_notify_flags,
+                                        flags, errp);
+    }
+
+    if (!ret) {
+        iommu_mr->iommu_notify_flags = flags;
+    }
+    return ret;
+}
+
+int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
+                                           uint64_t page_size_mask,
+                                           Error **errp)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    int ret = 0;
+
+    if (imrc->iommu_set_page_size_mask) {
+        ret = imrc->iommu_set_page_size_mask(iommu_mr, page_size_mask, errp);
+    }
+    return ret;
+}
+
+int memory_region_register_iommu_notifier(MemoryRegion *mr,
+                                          IOMMUNotifier *n, Error **errp)
+{
+    IOMMUMemoryRegion *iommu_mr;
+    int ret;
+
+    if (mr->alias) {
+        return memory_region_register_iommu_notifier(mr->alias, n, errp);
+    }
+
+    /* We need to register for at least one bitfield */
+    iommu_mr = IOMMU_MEMORY_REGION(mr);
+    assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+    assert(n->start <= n->end);
+    assert(n->iommu_idx >= 0 &&
+           n->iommu_idx < memory_region_iommu_num_indexes(iommu_mr));
+
+    QLIST_INSERT_HEAD(&iommu_mr->iommu_notify, n, node);
+    ret = memory_region_update_iommu_notify_flags(iommu_mr, errp);
+    if (ret) {
+        QLIST_REMOVE(n, node);
+    }
+    return ret;
+}
+
+uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (imrc->get_min_page_size) {
+        return imrc->get_min_page_size(iommu_mr);
+    }
+    return TARGET_PAGE_SIZE;
+}
+
+void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
+{
+    MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    hwaddr addr, granularity;
+    IOMMUTLBEntry iotlb;
+
+    /* If the IOMMU has its own replay callback, override */
+    if (imrc->replay) {
+        imrc->replay(iommu_mr, n);
+        return;
+    }
+
+    granularity = memory_region_iommu_get_min_page_size(iommu_mr);
+
+    for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
+        if (iotlb.perm != IOMMU_NONE) {
+            n->notify(n, &iotlb);
+        }
+
+        /* if (2^64 - MR size) < granularity, it's possible to get an
+         * infinite loop here.  This should catch such a wraparound */
+        if ((addr + granularity) < addr) {
+            break;
+        }
+    }
+}
+
+void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
+                                             IOMMUNotifier *n)
+{
+    IOMMUMemoryRegion *iommu_mr;
+
+    if (mr->alias) {
+        memory_region_unregister_iommu_notifier(mr->alias, n);
+        return;
+    }
+    QLIST_REMOVE(n, node);
+    iommu_mr = IOMMU_MEMORY_REGION(mr);
+    memory_region_update_iommu_notify_flags(iommu_mr, NULL);
+}
+
+void memory_region_notify_iommu_one(IOMMUNotifier *notifier,
+                                    IOMMUTLBEvent *event)
+{
+    IOMMUTLBEntry *entry = &event->entry;
+    hwaddr entry_end = entry->iova + entry->addr_mask;
+    IOMMUTLBEntry tmp = *entry;
+
+    if (event->type == IOMMU_NOTIFIER_UNMAP) {
+        assert(entry->perm == IOMMU_NONE);
+    }
+
+    /*
+     * Skip the notification if the notification does not overlap
+     * with registered range.
+     */
+    if (notifier->start > entry_end || notifier->end < entry->iova) {
+        return;
+    }
+
+    if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
+        /* Crop (iova, addr_mask) to range */
+        tmp.iova = MAX(tmp.iova, notifier->start);
+        tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova;
+    } else {
+        assert(entry->iova >= notifier->start && entry_end <= notifier->end);
+    }
+
+    if (event->type & notifier->notifier_flags) {
+        notifier->notify(notifier, &tmp);
+    }
+}
+
+void memory_region_unmap_iommu_notifier_range(IOMMUNotifier *notifier)
+{
+    IOMMUTLBEvent event;
+
+    event.type = IOMMU_NOTIFIER_UNMAP;
+    event.entry.target_as = &address_space_memory;
+    event.entry.iova = notifier->start;
+    event.entry.perm = IOMMU_NONE;
+    event.entry.addr_mask = notifier->end - notifier->start;
+
+    memory_region_notify_iommu_one(notifier, &event);
+}
+
+void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr,
+                                int iommu_idx,
+                                IOMMUTLBEvent event)
+{
+    IOMMUNotifier *iommu_notifier;
+
+    assert(memory_region_is_iommu(MEMORY_REGION(iommu_mr)));
+
+    IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) {
+        if (iommu_notifier->iommu_idx == iommu_idx) {
+            memory_region_notify_iommu_one(iommu_notifier, &event);
+        }
+    }
+}
+
+int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr,
+                                 enum IOMMUMemoryRegionAttr attr,
+                                 void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (!imrc->get_attr) {
+        return -EINVAL;
+    }
+
+    return imrc->get_attr(iommu_mr, attr, data);
+}
+
+int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr,
+                                       MemTxAttrs attrs)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (!imrc->attrs_to_index) {
+        return 0;
+    }
+
+    return imrc->attrs_to_index(iommu_mr, attrs);
+}
+
+int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+
+    if (!imrc->num_indexes) {
+        return 1;
+    }
+
+    return imrc->num_indexes(iommu_mr);
+}
+
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
+{
+    if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+        return NULL;
+    }
+    return mr->rdm;
+}
+
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                           RamDiscardManager *rdm)
+{
+    g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+    g_assert(!rdm || !mr->rdm);
+    mr->rdm = rdm;
+}
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+                                                 const MemoryRegion *mr)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->get_min_granularity);
+    return rdmc->get_min_granularity(rdm, mr);
+}
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+                                      const MemoryRegionSection *section)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->is_populated);
+    return rdmc->is_populated(rdm, section);
+}
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayRamPopulate replay_fn,
+                                         void *opaque)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->replay_populated);
+    return rdmc->replay_populated(rdm, section, replay_fn, opaque);
+}
+
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                          MemoryRegionSection *section,
+                                          ReplayRamDiscard replay_fn,
+                                          void *opaque)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->replay_discarded);
+    rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+}
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+                                           RamDiscardListener *rdl,
+                                           MemoryRegionSection *section)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->register_listener);
+    rdmc->register_listener(rdm, rdl, section);
+}
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->unregister_listener);
+    rdmc->unregister_listener(rdm, rdl);
+}
+
+/* Called with rcu_read_lock held.  */
+bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+                          ram_addr_t *ram_addr, bool *read_only,
+                          bool *mr_has_discard_manager)
+{
+    MemoryRegion *mr;
+    hwaddr xlat;
+    hwaddr len = iotlb->addr_mask + 1;
+    bool writable = iotlb->perm & IOMMU_WO;
+
+    if (mr_has_discard_manager) {
+        *mr_has_discard_manager = false;
+    }
+    /*
+     * The IOMMU TLB entry we have just covers translation through
+     * this IOMMU to its immediate target.  We need to translate
+     * it the rest of the way through to memory.
+     */
+    mr = address_space_translate(&address_space_memory, iotlb->translated_addr,
+                                 &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
+    if (!memory_region_is_ram(mr)) {
+        error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
+        return false;
+    } else if (memory_region_has_ram_discard_manager(mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+        MemoryRegionSection tmp = {
+            .mr = mr,
+            .offset_within_region = xlat,
+            .size = int128_make64(len),
+        };
+        if (mr_has_discard_manager) {
+            *mr_has_discard_manager = true;
+        }
+        /*
+         * Malicious VMs can map memory into the IOMMU, which is expected
+         * to remain discarded. vfio will pin all pages, populating memory.
+         * Disallow that. vmstate priorities make sure any RamDiscardManager
+         * were already restored before IOMMUs are restored.
+         */
+        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+            error_report("iommu map to discarded memory (e.g., unplugged via"
+                         " virtio-mem): %" HWADDR_PRIx "",
+                         iotlb->translated_addr);
+            return false;
+        }
+    }
+
+    /*
+     * Translation truncates length to the IOMMU page size,
+     * check that it did not truncate too much.
+     */
+    if (len & iotlb->addr_mask) {
+        error_report("iommu has granularity incompatible with target AS");
+        return false;
+    }
+
+    if (vaddr) {
+        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    }
+
+    if (ram_addr) {
+        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
+    }
+
+    if (read_only) {
+        *read_only = !writable || mr->readonly;
+    }
+
+    return true;
+}
+
+void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
+{
+    uint8_t mask = 1 << client;
+    uint8_t old_logging;
+
+    assert(client == DIRTY_MEMORY_VGA);
+    old_logging = mr->vga_logging_count;
+    mr->vga_logging_count += log ? 1 : -1;
+    if (!!old_logging == !!mr->vga_logging_count) {
+        return;
+    }
+
+    memory_region_transaction_begin();
+    mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
+    memory_region_update_pending |= mr->enabled;
+    memory_region_transaction_commit();
+}
+
+void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
+                             hwaddr size)
+{
+    assert(mr->ram_block);
+    cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
+                                        size,
+                                        memory_region_get_dirty_log_mask(mr));
+}
+
+/*
+ * If memory region `mr' is NULL, do global sync.  Otherwise, sync
+ * dirty bitmap for the specified memory region.
+ */
+static void memory_region_sync_dirty_bitmap(MemoryRegion *mr, bool last_stage)
+{
+    MemoryListener *listener;
+    AddressSpace *as;
+    FlatView *view;
+    FlatRange *fr;
+
+    /* If the same address space has multiple log_sync listeners, we
+     * visit that address space's FlatView multiple times.  But because
+     * log_sync listeners are rare, it's still cheaper than walking each
+     * address space once.
+     */
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (listener->log_sync) {
+            as = listener->address_space;
+            view = address_space_get_flatview(as);
+            FOR_EACH_FLAT_RANGE(fr, view) {
+                if (fr->dirty_log_mask && (!mr || fr->mr == mr)) {
+                    MemoryRegionSection mrs = section_from_flat_range(fr, view);
+                    listener->log_sync(listener, &mrs);
+                }
+            }
+            flatview_unref(view);
+            trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 0);
+        } else if (listener->log_sync_global) {
+            /*
+             * No matter whether MR is specified, what we can do here
+             * is to do a global sync, because we are not capable to
+             * sync in a finer granularity.
+             */
+            listener->log_sync_global(listener, last_stage);
+            trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 1);
+        }
+    }
+}
+
+void memory_region_clear_dirty_bitmap(MemoryRegion *mr, hwaddr start,
+                                      hwaddr len)
+{
+    MemoryRegionSection mrs;
+    MemoryListener *listener;
+    AddressSpace *as;
+    FlatView *view;
+    FlatRange *fr;
+    hwaddr sec_start, sec_end, sec_size;
+
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (!listener->log_clear) {
+            continue;
+        }
+        as = listener->address_space;
+        view = address_space_get_flatview(as);
+        FOR_EACH_FLAT_RANGE(fr, view) {
+            if (!fr->dirty_log_mask || fr->mr != mr) {
+                /*
+                 * Clear dirty bitmap operation only applies to those
+                 * regions whose dirty logging is at least enabled
+                 */
+                continue;
+            }
+
+            mrs = section_from_flat_range(fr, view);
+
+            sec_start = MAX(mrs.offset_within_region, start);
+            sec_end = mrs.offset_within_region + int128_get64(mrs.size);
+            sec_end = MIN(sec_end, start + len);
+
+            if (sec_start >= sec_end) {
+                /*
+                 * If this memory region section has no intersection
+                 * with the requested range, skip.
+                 */
+                continue;
+            }
+
+            /* Valid case; shrink the section if needed */
+            mrs.offset_within_address_space +=
+                sec_start - mrs.offset_within_region;
+            mrs.offset_within_region = sec_start;
+            sec_size = sec_end - sec_start;
+            mrs.size = int128_make64(sec_size);
+            listener->log_clear(listener, &mrs);
+        }
+        flatview_unref(view);
+    }
+}
+
+DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr,
+                                                            hwaddr addr,
+                                                            hwaddr size,
+                                                            unsigned client)
+{
+    DirtyBitmapSnapshot *snapshot;
+    assert(mr->ram_block);
+    memory_region_sync_dirty_bitmap(mr, false);
+    snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client);
+    memory_global_after_dirty_log_sync();
+    return snapshot;
+}
+
+bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap,
+                                      hwaddr addr, hwaddr size)
+{
+    assert(mr->ram_block);
+    return cpu_physical_memory_snapshot_get_dirty(snap,
+                memory_region_get_ram_addr(mr) + addr, size);
+}
+
+void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
+{
+    if (mr->readonly != readonly) {
+        memory_region_transaction_begin();
+        mr->readonly = readonly;
+        memory_region_update_pending |= mr->enabled;
+        memory_region_transaction_commit();
+    }
+}
+
+void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile)
+{
+    if (mr->nonvolatile != nonvolatile) {
+        memory_region_transaction_begin();
+        mr->nonvolatile = nonvolatile;
+        memory_region_update_pending |= mr->enabled;
+        memory_region_transaction_commit();
+    }
+}
+
+void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
+{
+    if (mr->romd_mode != romd_mode) {
+        memory_region_transaction_begin();
+        mr->romd_mode = romd_mode;
+        memory_region_update_pending |= mr->enabled;
+        memory_region_transaction_commit();
+    }
+}
+
+void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
+                               hwaddr size, unsigned client)
+{
+    assert(mr->ram_block);
+    cpu_physical_memory_test_and_clear_dirty(
+        memory_region_get_ram_addr(mr) + addr, size, client);
+}
+
+int memory_region_get_fd(MemoryRegion *mr)
+{
+    RCU_READ_LOCK_GUARD();
+    while (mr->alias) {
+        mr = mr->alias;
+    }
+    return mr->ram_block->fd;
+}
+
+void *memory_region_get_ram_ptr(MemoryRegion *mr)
+{
+    uint64_t offset = 0;
+
+    RCU_READ_LOCK_GUARD();
+    while (mr->alias) {
+        offset += mr->alias_offset;
+        mr = mr->alias;
+    }
+    assert(mr->ram_block);
+    return qemu_map_ram_ptr(mr->ram_block, offset);
+}
+
+MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset)
+{
+    RAMBlock *block;
+
+    block = qemu_ram_block_from_host(ptr, false, offset);
+    if (!block) {
+        return NULL;
+    }
+
+    return block->mr;
+}
+
+ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
+{
+    return mr->ram_block ? mr->ram_block->offset : RAM_ADDR_INVALID;
+}
+
+void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp)
+{
+    assert(mr->ram_block);
+
+    qemu_ram_resize(mr->ram_block, newsize, errp);
+}
+
+void memory_region_msync(MemoryRegion *mr, hwaddr addr, hwaddr size)
+{
+    if (mr->ram_block) {
+        qemu_ram_msync(mr->ram_block, addr, size);
+    }
+}
+
+void memory_region_writeback(MemoryRegion *mr, hwaddr addr, hwaddr size)
+{
+    /*
+     * Might be extended case needed to cover
+     * different types of memory regions
+     */
+    if (mr->dirty_log_mask) {
+        memory_region_msync(mr, addr, size);
+    }
+}
+
+/*
+ * Call proper memory listeners about the change on the newly
+ * added/removed CoalescedMemoryRange.
+ */
+static void memory_region_update_coalesced_range(MemoryRegion *mr,
+                                                 CoalescedMemoryRange *cmr,
+                                                 bool add)
+{
+    AddressSpace *as;
+    FlatView *view;
+    FlatRange *fr;
+
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        view = address_space_get_flatview(as);
+        FOR_EACH_FLAT_RANGE(fr, view) {
+            if (fr->mr == mr) {
+                flat_range_coalesced_io_notify(fr, as, cmr, add);
+            }
+        }
+        flatview_unref(view);
+    }
+}
+
+void memory_region_set_coalescing(MemoryRegion *mr)
+{
+    memory_region_clear_coalescing(mr);
+    memory_region_add_coalescing(mr, 0, int128_get64(mr->size));
+}
+
+void memory_region_add_coalescing(MemoryRegion *mr,
+                                  hwaddr offset,
+                                  uint64_t size)
+{
+    CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr));
+
+    cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size));
+    QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link);
+    memory_region_update_coalesced_range(mr, cmr, true);
+    memory_region_set_flush_coalesced(mr);
+}
+
+void memory_region_clear_coalescing(MemoryRegion *mr)
+{
+    CoalescedMemoryRange *cmr;
+
+    if (QTAILQ_EMPTY(&mr->coalesced)) {
+        return;
+    }
+
+    qemu_flush_coalesced_mmio_buffer();
+    mr->flush_coalesced_mmio = false;
+
+    while (!QTAILQ_EMPTY(&mr->coalesced)) {
+        cmr = QTAILQ_FIRST(&mr->coalesced);
+        QTAILQ_REMOVE(&mr->coalesced, cmr, link);
+        memory_region_update_coalesced_range(mr, cmr, false);
+        g_free(cmr);
+    }
+}
+
+void memory_region_set_flush_coalesced(MemoryRegion *mr)
+{
+    mr->flush_coalesced_mmio = true;
+}
+
+void memory_region_clear_flush_coalesced(MemoryRegion *mr)
+{
+    qemu_flush_coalesced_mmio_buffer();
+    if (QTAILQ_EMPTY(&mr->coalesced)) {
+        mr->flush_coalesced_mmio = false;
+    }
+}
+
+static bool userspace_eventfd_warning;
+
+void memory_region_add_eventfd(MemoryRegion *mr,
+                               hwaddr addr,
+                               unsigned size,
+                               bool match_data,
+                               uint64_t data,
+                               EventNotifier *e)
+{
+    MemoryRegionIoeventfd mrfd = {
+        .addr.start = int128_make64(addr),
+        .addr.size = int128_make64(size),
+        .match_data = match_data,
+        .data = data,
+        .e = e,
+    };
+    unsigned i;
+
+    if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
+                            userspace_eventfd_warning))) {
+        userspace_eventfd_warning = true;
+        error_report("Using eventfd without MMIO binding in KVM. "
+                     "Suboptimal performance expected");
+    }
+
+    if (size) {
+        adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE);
+    }
+    memory_region_transaction_begin();
+    for (i = 0; i < mr->ioeventfd_nb; ++i) {
+        if (memory_region_ioeventfd_before(&mrfd, &mr->ioeventfds[i])) {
+            break;
+        }
+    }
+    ++mr->ioeventfd_nb;
+    mr->ioeventfds = g_realloc(mr->ioeventfds,
+                                  sizeof(*mr->ioeventfds) * mr->ioeventfd_nb);
+    memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i],
+            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
+    mr->ioeventfds[i] = mrfd;
+    ioeventfd_update_pending |= mr->enabled;
+    memory_region_transaction_commit();
+}
+
+void memory_region_del_eventfd(MemoryRegion *mr,
+                               hwaddr addr,
+                               unsigned size,
+                               bool match_data,
+                               uint64_t data,
+                               EventNotifier *e)
+{
+    MemoryRegionIoeventfd mrfd = {
+        .addr.start = int128_make64(addr),
+        .addr.size = int128_make64(size),
+        .match_data = match_data,
+        .data = data,
+        .e = e,
+    };
+    unsigned i;
+
+    if (size) {
+        adjust_endianness(mr, &mrfd.data, size_memop(size) | MO_TE);
+    }
+    memory_region_transaction_begin();
+    for (i = 0; i < mr->ioeventfd_nb; ++i) {
+        if (memory_region_ioeventfd_equal(&mrfd, &mr->ioeventfds[i])) {
+            break;
+        }
+    }
+    assert(i != mr->ioeventfd_nb);
+    memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1],
+            sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1)));
+    --mr->ioeventfd_nb;
+    mr->ioeventfds = g_realloc(mr->ioeventfds,
+                                  sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
+    ioeventfd_update_pending |= mr->enabled;
+    memory_region_transaction_commit();
+}
+
+static void memory_region_update_container_subregions(MemoryRegion *subregion)
+{
+    MemoryRegion *mr = subregion->container;
+    MemoryRegion *other;
+
+    memory_region_transaction_begin();
+
+    memory_region_ref(subregion);
+    QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
+        if (subregion->priority >= other->priority) {
+            QTAILQ_INSERT_BEFORE(other, subregion, subregions_link);
+            goto done;
+        }
+    }
+    QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
+done:
+    memory_region_update_pending |= mr->enabled && subregion->enabled;
+    memory_region_transaction_commit();
+}
+
+static void memory_region_add_subregion_common(MemoryRegion *mr,
+                                               hwaddr offset,
+                                               MemoryRegion *subregion)
+{
+    MemoryRegion *alias;
+
+    assert(!subregion->container);
+    subregion->container = mr;
+    for (alias = subregion->alias; alias; alias = alias->alias) {
+        alias->mapped_via_alias++;
+    }
+    subregion->addr = offset;
+    memory_region_update_container_subregions(subregion);
+}
+
+void memory_region_add_subregion(MemoryRegion *mr,
+                                 hwaddr offset,
+                                 MemoryRegion *subregion)
+{
+    subregion->priority = 0;
+    memory_region_add_subregion_common(mr, offset, subregion);
+}
+
+void memory_region_add_subregion_overlap(MemoryRegion *mr,
+                                         hwaddr offset,
+                                         MemoryRegion *subregion,
+                                         int priority)
+{
+    subregion->priority = priority;
+    memory_region_add_subregion_common(mr, offset, subregion);
+}
+
+void memory_region_del_subregion(MemoryRegion *mr,
+                                 MemoryRegion *subregion)
+{
+    MemoryRegion *alias;
+
+    memory_region_transaction_begin();
+    assert(subregion->container == mr);
+    subregion->container = NULL;
+    for (alias = subregion->alias; alias; alias = alias->alias) {
+        alias->mapped_via_alias--;
+        assert(alias->mapped_via_alias >= 0);
+    }
+    QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
+    memory_region_unref(subregion);
+    memory_region_update_pending |= mr->enabled && subregion->enabled;
+    memory_region_transaction_commit();
+}
+
+void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
+{
+    if (enabled == mr->enabled) {
+        return;
+    }
+    memory_region_transaction_begin();
+    mr->enabled = enabled;
+    memory_region_update_pending = true;
+    memory_region_transaction_commit();
+}
+
+void memory_region_set_size(MemoryRegion *mr, uint64_t size)
+{
+    Int128 s = int128_make64(size);
+
+    if (size == UINT64_MAX) {
+        s = int128_2_64();
+    }
+    if (int128_eq(s, mr->size)) {
+        return;
+    }
+    memory_region_transaction_begin();
+    mr->size = s;
+    memory_region_update_pending = true;
+    memory_region_transaction_commit();
+}
+
+static void memory_region_readd_subregion(MemoryRegion *mr)
+{
+    MemoryRegion *container = mr->container;
+
+    if (container) {
+        memory_region_transaction_begin();
+        memory_region_ref(mr);
+        memory_region_del_subregion(container, mr);
+        memory_region_add_subregion_common(container, mr->addr, mr);
+        memory_region_unref(mr);
+        memory_region_transaction_commit();
+    }
+}
+
+void memory_region_set_address(MemoryRegion *mr, hwaddr addr)
+{
+    if (addr != mr->addr) {
+        mr->addr = addr;
+        memory_region_readd_subregion(mr);
+    }
+}
+
+void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
+{
+    assert(mr->alias);
+
+    if (offset == mr->alias_offset) {
+        return;
+    }
+
+    memory_region_transaction_begin();
+    mr->alias_offset = offset;
+    memory_region_update_pending |= mr->enabled;
+    memory_region_transaction_commit();
+}
+
+uint64_t memory_region_get_alignment(const MemoryRegion *mr)
+{
+    return mr->align;
+}
+
+static int cmp_flatrange_addr(const void *addr_, const void *fr_)
+{
+    const AddrRange *addr = addr_;
+    const FlatRange *fr = fr_;
+
+    if (int128_le(addrrange_end(*addr), fr->addr.start)) {
+        return -1;
+    } else if (int128_ge(addr->start, addrrange_end(fr->addr))) {
+        return 1;
+    }
+    return 0;
+}
+
+static FlatRange *flatview_lookup(FlatView *view, AddrRange addr)
+{
+    return bsearch(&addr, view->ranges, view->nr,
+                   sizeof(FlatRange), cmp_flatrange_addr);
+}
+
+bool memory_region_is_mapped(MemoryRegion *mr)
+{
+    return !!mr->container || mr->mapped_via_alias;
+}
+
+/* Same as memory_region_find, but it does not add a reference to the
+ * returned region.  It must be called from an RCU critical section.
+ */
+static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
+                                                  hwaddr addr, uint64_t size)
+{
+    MemoryRegionSection ret = { .mr = NULL };
+    MemoryRegion *root;
+    AddressSpace *as;
+    AddrRange range;
+    FlatView *view;
+    FlatRange *fr;
+
+    addr += mr->addr;
+    for (root = mr; root->container; ) {
+        root = root->container;
+        addr += root->addr;
+    }
+
+    as = memory_region_to_address_space(root);
+    if (!as) {
+        return ret;
+    }
+    range = addrrange_make(int128_make64(addr), int128_make64(size));
+
+    view = address_space_to_flatview(as);
+    fr = flatview_lookup(view, range);
+    if (!fr) {
+        return ret;
+    }
+
+    while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) {
+        --fr;
+    }
+
+    ret.mr = fr->mr;
+    ret.fv = view;
+    range = addrrange_intersection(range, fr->addr);
+    ret.offset_within_region = fr->offset_in_region;
+    ret.offset_within_region += int128_get64(int128_sub(range.start,
+                                                        fr->addr.start));
+    ret.size = range.size;
+    ret.offset_within_address_space = int128_get64(range.start);
+    ret.readonly = fr->readonly;
+    ret.nonvolatile = fr->nonvolatile;
+    return ret;
+}
+
+MemoryRegionSection memory_region_find(MemoryRegion *mr,
+                                       hwaddr addr, uint64_t size)
+{
+    MemoryRegionSection ret;
+    RCU_READ_LOCK_GUARD();
+    ret = memory_region_find_rcu(mr, addr, size);
+    if (ret.mr) {
+        memory_region_ref(ret.mr);
+    }
+    return ret;
+}
+
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s)
+{
+    MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1);
+
+    *tmp = *s;
+    if (tmp->mr) {
+        memory_region_ref(tmp->mr);
+    }
+    if (tmp->fv) {
+        bool ret  = flatview_ref(tmp->fv);
+
+        g_assert(ret);
+    }
+    return tmp;
+}
+
+void memory_region_section_free_copy(MemoryRegionSection *s)
+{
+    if (s->fv) {
+        flatview_unref(s->fv);
+    }
+    if (s->mr) {
+        memory_region_unref(s->mr);
+    }
+    g_free(s);
+}
+
+bool memory_region_present(MemoryRegion *container, hwaddr addr)
+{
+    MemoryRegion *mr;
+
+    RCU_READ_LOCK_GUARD();
+    mr = memory_region_find_rcu(container, addr, 1).mr;
+    return mr && mr != container;
+}
+
+void memory_global_dirty_log_sync(bool last_stage)
+{
+    memory_region_sync_dirty_bitmap(NULL, last_stage);
+}
+
+void memory_global_after_dirty_log_sync(void)
+{
+    MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward);
+}
+
+/*
+ * Dirty track stop flags that are postponed due to VM being stopped.  Should
+ * only be used within vmstate_change hook.
+ */
+static unsigned int postponed_stop_flags;
+static VMChangeStateEntry *vmstate_change;
+static void memory_global_dirty_log_stop_postponed_run(void);
+
+void memory_global_dirty_log_start(unsigned int flags)
+{
+    unsigned int old_flags;
+
+    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+
+    if (vmstate_change) {
+        /* If there is postponed stop(), operate on it first */
+        postponed_stop_flags &= ~flags;
+        memory_global_dirty_log_stop_postponed_run();
+    }
+
+    flags &= ~global_dirty_tracking;
+    if (!flags) {
+        return;
+    }
+
+    old_flags = global_dirty_tracking;
+    global_dirty_tracking |= flags;
+    trace_global_dirty_changed(global_dirty_tracking);
+
+    if (!old_flags) {
+        MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
+        memory_region_transaction_begin();
+        memory_region_update_pending = true;
+        memory_region_transaction_commit();
+    }
+}
+
+static void memory_global_dirty_log_do_stop(unsigned int flags)
+{
+    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+    assert((global_dirty_tracking & flags) == flags);
+    global_dirty_tracking &= ~flags;
+
+    trace_global_dirty_changed(global_dirty_tracking);
+
+    if (!global_dirty_tracking) {
+        memory_region_transaction_begin();
+        memory_region_update_pending = true;
+        memory_region_transaction_commit();
+        MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
+    }
+}
+
+/*
+ * Execute the postponed dirty log stop operations if there is, then reset
+ * everything (including the flags and the vmstate change hook).
+ */
+static void memory_global_dirty_log_stop_postponed_run(void)
+{
+    /* This must be called with the vmstate handler registered */
+    assert(vmstate_change);
+
+    /* Note: postponed_stop_flags can be cleared in log start routine */
+    if (postponed_stop_flags) {
+        memory_global_dirty_log_do_stop(postponed_stop_flags);
+        postponed_stop_flags = 0;
+    }
+
+    qemu_del_vm_change_state_handler(vmstate_change);
+    vmstate_change = NULL;
+}
+
+static void memory_vm_change_state_handler(void *opaque, bool running,
+                                           RunState state)
+{
+    if (running) {
+        memory_global_dirty_log_stop_postponed_run();
+    }
+}
+
+void memory_global_dirty_log_stop(unsigned int flags)
+{
+    if (!runstate_is_running()) {
+        /* Postpone the dirty log stop, e.g., to when VM starts again */
+        if (vmstate_change) {
+            /* Batch with previous postponed flags */
+            postponed_stop_flags |= flags;
+        } else {
+            postponed_stop_flags = flags;
+            vmstate_change = qemu_add_vm_change_state_handler(
+                memory_vm_change_state_handler, NULL);
+        }
+        return;
+    }
+
+    memory_global_dirty_log_do_stop(flags);
+}
+
+static void listener_add_address_space(MemoryListener *listener,
+                                       AddressSpace *as)
+{
+    FlatView *view;
+    FlatRange *fr;
+
+    if (listener->begin) {
+        listener->begin(listener);
+    }
+    if (global_dirty_tracking) {
+        if (listener->log_global_start) {
+            listener->log_global_start(listener);
+        }
+    }
+
+    view = address_space_get_flatview(as);
+    FOR_EACH_FLAT_RANGE(fr, view) {
+        MemoryRegionSection section = section_from_flat_range(fr, view);
+
+        if (listener->region_add) {
+            listener->region_add(listener, &section);
+        }
+        if (fr->dirty_log_mask && listener->log_start) {
+            listener->log_start(listener, &section, 0, fr->dirty_log_mask);
+        }
+    }
+    if (listener->commit) {
+        listener->commit(listener);
+    }
+    flatview_unref(view);
+}
+
+static void listener_del_address_space(MemoryListener *listener,
+                                       AddressSpace *as)
+{
+    FlatView *view;
+    FlatRange *fr;
+
+    if (listener->begin) {
+        listener->begin(listener);
+    }
+    view = address_space_get_flatview(as);
+    FOR_EACH_FLAT_RANGE(fr, view) {
+        MemoryRegionSection section = section_from_flat_range(fr, view);
+
+        if (fr->dirty_log_mask && listener->log_stop) {
+            listener->log_stop(listener, &section, fr->dirty_log_mask, 0);
+        }
+        if (listener->region_del) {
+            listener->region_del(listener, &section);
+        }
+    }
+    if (listener->commit) {
+        listener->commit(listener);
+    }
+    flatview_unref(view);
+}
+
+void memory_listener_register(MemoryListener *listener, AddressSpace *as)
+{
+    MemoryListener *other = NULL;
+
+    /* Only one of them can be defined for a listener */
+    assert(!(listener->log_sync && listener->log_sync_global));
+
+    listener->address_space = as;
+    if (QTAILQ_EMPTY(&memory_listeners)
+        || listener->priority >= QTAILQ_LAST(&memory_listeners)->priority) {
+        QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
+    } else {
+        QTAILQ_FOREACH(other, &memory_listeners, link) {
+            if (listener->priority < other->priority) {
+                break;
+            }
+        }
+        QTAILQ_INSERT_BEFORE(other, listener, link);
+    }
+
+    if (QTAILQ_EMPTY(&as->listeners)
+        || listener->priority >= QTAILQ_LAST(&as->listeners)->priority) {
+        QTAILQ_INSERT_TAIL(&as->listeners, listener, link_as);
+    } else {
+        QTAILQ_FOREACH(other, &as->listeners, link_as) {
+            if (listener->priority < other->priority) {
+                break;
+            }
+        }
+        QTAILQ_INSERT_BEFORE(other, listener, link_as);
+    }
+
+    listener_add_address_space(listener, as);
+
+    if (listener->eventfd_add || listener->eventfd_del) {
+        as->ioeventfd_notifiers++;
+    }
+}
+
+void memory_listener_unregister(MemoryListener *listener)
+{
+    if (!listener->address_space) {
+        return;
+    }
+
+    if (listener->eventfd_add || listener->eventfd_del) {
+        listener->address_space->ioeventfd_notifiers--;
+    }
+
+    listener_del_address_space(listener, listener->address_space);
+    QTAILQ_REMOVE(&memory_listeners, listener, link);
+    QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
+    listener->address_space = NULL;
+}
+
+void address_space_remove_listeners(AddressSpace *as)
+{
+    while (!QTAILQ_EMPTY(&as->listeners)) {
+        memory_listener_unregister(QTAILQ_FIRST(&as->listeners));
+    }
+}
+
+void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
+{
+    memory_region_ref(root);
+    as->root = root;
+    as->current_map = NULL;
+    as->ioeventfd_nb = 0;
+    as->ioeventfds = NULL;
+    QTAILQ_INIT(&as->listeners);
+    QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
+    as->name = g_strdup(name ? name : "anonymous");
+    address_space_update_topology(as);
+    address_space_update_ioeventfds(as);
+}
+
+static void do_address_space_destroy(AddressSpace *as)
+{
+    assert(QTAILQ_EMPTY(&as->listeners));
+
+    flatview_unref(as->current_map);
+    g_free(as->name);
+    g_free(as->ioeventfds);
+    memory_region_unref(as->root);
+}
+
+void address_space_destroy(AddressSpace *as)
+{
+    MemoryRegion *root = as->root;
+
+    /* Flush out anything from MemoryListeners listening in on this */
+    memory_region_transaction_begin();
+    as->root = NULL;
+    memory_region_transaction_commit();
+    QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
+
+    /* At this point, as->dispatch and as->current_map are dummy
+     * entries that the guest should never use.  Wait for the old
+     * values to expire before freeing the data.
+     */
+    as->root = root;
+    call_rcu(as, do_address_space_destroy, rcu);
+}
+
+static const char *memory_region_type(MemoryRegion *mr)
+{
+    if (mr->alias) {
+        return memory_region_type(mr->alias);
+    }
+    if (memory_region_is_ram_device(mr)) {
+        return "ramd";
+    } else if (memory_region_is_romd(mr)) {
+        return "romd";
+    } else if (memory_region_is_rom(mr)) {
+        return "rom";
+    } else if (memory_region_is_ram(mr)) {
+        return "ram";
+    } else {
+        return "i/o";
+    }
+}
+
+typedef struct MemoryRegionList MemoryRegionList;
+
+struct MemoryRegionList {
+    const MemoryRegion *mr;
+    QTAILQ_ENTRY(MemoryRegionList) mrqueue;
+};
+
+typedef QTAILQ_HEAD(, MemoryRegionList) MemoryRegionListHead;
+
+#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
+                           int128_sub((size), int128_one())) : 0)
+#define MTREE_INDENT "  "
+
+static void mtree_expand_owner(const char *label, Object *obj)
+{
+    DeviceState *dev = (DeviceState *) object_dynamic_cast(obj, TYPE_DEVICE);
+
+    qemu_printf(" %s:{%s", label, dev ? "dev" : "obj");
+    if (dev && dev->id) {
+        qemu_printf(" id=%s", dev->id);
+    } else {
+        char *canonical_path = object_get_canonical_path(obj);
+        if (canonical_path) {
+            qemu_printf(" path=%s", canonical_path);
+            g_free(canonical_path);
+        } else {
+            qemu_printf(" type=%s", object_get_typename(obj));
+        }
+    }
+    qemu_printf("}");
+}
+
+static void mtree_print_mr_owner(const MemoryRegion *mr)
+{
+    Object *owner = mr->owner;
+    Object *parent = memory_region_owner((MemoryRegion *)mr);
+
+    if (!owner && !parent) {
+        qemu_printf(" orphan");
+        return;
+    }
+    if (owner) {
+        mtree_expand_owner("owner", owner);
+    }
+    if (parent && parent != owner) {
+        mtree_expand_owner("parent", parent);
+    }
+}
+
+static void mtree_print_mr(const MemoryRegion *mr, unsigned int level,
+                           hwaddr base,
+                           MemoryRegionListHead *alias_print_queue,
+                           bool owner, bool display_disabled)
+{
+    MemoryRegionList *new_ml, *ml, *next_ml;
+    MemoryRegionListHead submr_print_queue;
+    const MemoryRegion *submr;
+    unsigned int i;
+    hwaddr cur_start, cur_end;
+
+    if (!mr) {
+        return;
+    }
+
+    cur_start = base + mr->addr;
+    cur_end = cur_start + MR_SIZE(mr->size);
+
+    /*
+     * Try to detect overflow of memory region. This should never
+     * happen normally. When it happens, we dump something to warn the
+     * user who is observing this.
+     */
+    if (cur_start < base || cur_end < cur_start) {
+        qemu_printf("[DETECTED OVERFLOW!] ");
+    }
+
+    if (mr->alias) {
+        bool found = false;
+
+        /* check if the alias is already in the queue */
+        QTAILQ_FOREACH(ml, alias_print_queue, mrqueue) {
+            if (ml->mr == mr->alias) {
+                found = true;
+            }
+        }
+
+        if (!found) {
+            ml = g_new(MemoryRegionList, 1);
+            ml->mr = mr->alias;
+            QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue);
+        }
+        if (mr->enabled || display_disabled) {
+            for (i = 0; i < level; i++) {
+                qemu_printf(MTREE_INDENT);
+            }
+            qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx
+                        " (prio %d, %s%s): alias %s @%s " HWADDR_FMT_plx
+                        "-" HWADDR_FMT_plx "%s",
+                        cur_start, cur_end,
+                        mr->priority,
+                        mr->nonvolatile ? "nv-" : "",
+                        memory_region_type((MemoryRegion *)mr),
+                        memory_region_name(mr),
+                        memory_region_name(mr->alias),
+                        mr->alias_offset,
+                        mr->alias_offset + MR_SIZE(mr->size),
+                        mr->enabled ? "" : " [disabled]");
+            if (owner) {
+                mtree_print_mr_owner(mr);
+            }
+            qemu_printf("\n");
+        }
+    } else {
+        if (mr->enabled || display_disabled) {
+            for (i = 0; i < level; i++) {
+                qemu_printf(MTREE_INDENT);
+            }
+            qemu_printf(HWADDR_FMT_plx "-" HWADDR_FMT_plx
+                        " (prio %d, %s%s): %s%s",
+                        cur_start, cur_end,
+                        mr->priority,
+                        mr->nonvolatile ? "nv-" : "",
+                        memory_region_type((MemoryRegion *)mr),
+                        memory_region_name(mr),
+                        mr->enabled ? "" : " [disabled]");
+            if (owner) {
+                mtree_print_mr_owner(mr);
+            }
+            qemu_printf("\n");
+        }
+    }
+
+    QTAILQ_INIT(&submr_print_queue);
+
+    QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) {
+        new_ml = g_new(MemoryRegionList, 1);
+        new_ml->mr = submr;
+        QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) {
+            if (new_ml->mr->addr < ml->mr->addr ||
+                (new_ml->mr->addr == ml->mr->addr &&
+                 new_ml->mr->priority > ml->mr->priority)) {
+                QTAILQ_INSERT_BEFORE(ml, new_ml, mrqueue);
+                new_ml = NULL;
+                break;
+            }
+        }
+        if (new_ml) {
+            QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, mrqueue);
+        }
+    }
+
+    QTAILQ_FOREACH(ml, &submr_print_queue, mrqueue) {
+        mtree_print_mr(ml->mr, level + 1, cur_start,
+                       alias_print_queue, owner, display_disabled);
+    }
+
+    QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, mrqueue, next_ml) {
+        g_free(ml);
+    }
+}
+
+struct FlatViewInfo {
+    int counter;
+    bool dispatch_tree;
+    bool owner;
+    AccelClass *ac;
+};
+
+static void mtree_print_flatview(gpointer key, gpointer value,
+                                 gpointer user_data)
+{
+    FlatView *view = key;
+    GArray *fv_address_spaces = value;
+    struct FlatViewInfo *fvi = user_data;
+    FlatRange *range = &view->ranges[0];
+    MemoryRegion *mr;
+    int n = view->nr;
+    int i;
+    AddressSpace *as;
+
+    qemu_printf("FlatView #%d\n", fvi->counter);
+    ++fvi->counter;
+
+    for (i = 0; i < fv_address_spaces->len; ++i) {
+        as = g_array_index(fv_address_spaces, AddressSpace*, i);
+        qemu_printf(" AS \"%s\", root: %s",
+                    as->name, memory_region_name(as->root));
+        if (as->root->alias) {
+            qemu_printf(", alias %s", memory_region_name(as->root->alias));
+        }
+        qemu_printf("\n");
+    }
+
+    qemu_printf(" Root memory region: %s\n",
+      view->root ? memory_region_name(view->root) : "(none)");
+
+    if (n <= 0) {
+        qemu_printf(MTREE_INDENT "No rendered FlatView\n\n");
+        return;
+    }
+
+    while (n--) {
+        mr = range->mr;
+        if (range->offset_in_region) {
+            qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx
+                        " (prio %d, %s%s): %s @" HWADDR_FMT_plx,
+                        int128_get64(range->addr.start),
+                        int128_get64(range->addr.start)
+                        + MR_SIZE(range->addr.size),
+                        mr->priority,
+                        range->nonvolatile ? "nv-" : "",
+                        range->readonly ? "rom" : memory_region_type(mr),
+                        memory_region_name(mr),
+                        range->offset_in_region);
+        } else {
+            qemu_printf(MTREE_INDENT HWADDR_FMT_plx "-" HWADDR_FMT_plx
+                        " (prio %d, %s%s): %s",
+                        int128_get64(range->addr.start),
+                        int128_get64(range->addr.start)
+                        + MR_SIZE(range->addr.size),
+                        mr->priority,
+                        range->nonvolatile ? "nv-" : "",
+                        range->readonly ? "rom" : memory_region_type(mr),
+                        memory_region_name(mr));
+        }
+        if (fvi->owner) {
+            mtree_print_mr_owner(mr);
+        }
+
+        if (fvi->ac) {
+            for (i = 0; i < fv_address_spaces->len; ++i) {
+                as = g_array_index(fv_address_spaces, AddressSpace*, i);
+                if (fvi->ac->has_memory(current_machine, as,
+                                        int128_get64(range->addr.start),
+                                        MR_SIZE(range->addr.size) + 1)) {
+                    qemu_printf(" %s", fvi->ac->name);
+                }
+            }
+        }
+        qemu_printf("\n");
+        range++;
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    if (fvi->dispatch_tree && view->root) {
+        mtree_print_dispatch(view->dispatch, view->root);
+    }
+#endif
+
+    qemu_printf("\n");
+}
+
+static gboolean mtree_info_flatview_free(gpointer key, gpointer value,
+                                      gpointer user_data)
+{
+    FlatView *view = key;
+    GArray *fv_address_spaces = value;
+
+    g_array_unref(fv_address_spaces);
+    flatview_unref(view);
+
+    return true;
+}
+
+static void mtree_info_flatview(bool dispatch_tree, bool owner)
+{
+    struct FlatViewInfo fvi = {
+        .counter = 0,
+        .dispatch_tree = dispatch_tree,
+        .owner = owner,
+    };
+    AddressSpace *as;
+    FlatView *view;
+    GArray *fv_address_spaces;
+    GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal);
+    AccelClass *ac = ACCEL_GET_CLASS(current_accel());
+
+    if (ac->has_memory) {
+        fvi.ac = ac;
+    }
+
+    /* Gather all FVs in one table */
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        view = address_space_get_flatview(as);
+
+        fv_address_spaces = g_hash_table_lookup(views, view);
+        if (!fv_address_spaces) {
+            fv_address_spaces = g_array_new(false, false, sizeof(as));
+            g_hash_table_insert(views, view, fv_address_spaces);
+        }
+
+        g_array_append_val(fv_address_spaces, as);
+    }
+
+    /* Print */
+    g_hash_table_foreach(views, mtree_print_flatview, &fvi);
+
+    /* Free */
+    g_hash_table_foreach_remove(views, mtree_info_flatview_free, 0);
+    g_hash_table_unref(views);
+}
+
+struct AddressSpaceInfo {
+    MemoryRegionListHead *ml_head;
+    bool owner;
+    bool disabled;
+};
+
+/* Returns negative value if a < b; zero if a = b; positive value if a > b. */
+static gint address_space_compare_name(gconstpointer a, gconstpointer b)
+{
+    const AddressSpace *as_a = a;
+    const AddressSpace *as_b = b;
+
+    return g_strcmp0(as_a->name, as_b->name);
+}
+
+static void mtree_print_as_name(gpointer data, gpointer user_data)
+{
+    AddressSpace *as = data;
+
+    qemu_printf("address-space: %s\n", as->name);
+}
+
+static void mtree_print_as(gpointer key, gpointer value, gpointer user_data)
+{
+    MemoryRegion *mr = key;
+    GSList *as_same_root_mr_list = value;
+    struct AddressSpaceInfo *asi = user_data;
+
+    g_slist_foreach(as_same_root_mr_list, mtree_print_as_name, NULL);
+    mtree_print_mr(mr, 1, 0, asi->ml_head, asi->owner, asi->disabled);
+    qemu_printf("\n");
+}
+
+static gboolean mtree_info_as_free(gpointer key, gpointer value,
+                                   gpointer user_data)
+{
+    GSList *as_same_root_mr_list = value;
+
+    g_slist_free(as_same_root_mr_list);
+
+    return true;
+}
+
+static void mtree_info_as(bool dispatch_tree, bool owner, bool disabled)
+{
+    MemoryRegionListHead ml_head;
+    MemoryRegionList *ml, *ml2;
+    AddressSpace *as;
+    GHashTable *views = g_hash_table_new(g_direct_hash, g_direct_equal);
+    GSList *as_same_root_mr_list;
+    struct AddressSpaceInfo asi = {
+        .ml_head = &ml_head,
+        .owner = owner,
+        .disabled = disabled,
+    };
+
+    QTAILQ_INIT(&ml_head);
+
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        /* Create hashtable, key=AS root MR, value = list of AS */
+        as_same_root_mr_list = g_hash_table_lookup(views, as->root);
+        as_same_root_mr_list = g_slist_insert_sorted(as_same_root_mr_list, as,
+                                                     address_space_compare_name);
+        g_hash_table_insert(views, as->root, as_same_root_mr_list);
+    }
+
+    /* print address spaces */
+    g_hash_table_foreach(views, mtree_print_as, &asi);
+    g_hash_table_foreach_remove(views, mtree_info_as_free, 0);
+    g_hash_table_unref(views);
+
+    /* print aliased regions */
+    QTAILQ_FOREACH(ml, &ml_head, mrqueue) {
+        qemu_printf("memory-region: %s\n", memory_region_name(ml->mr));
+        mtree_print_mr(ml->mr, 1, 0, &ml_head, owner, disabled);
+        qemu_printf("\n");
+    }
+
+    QTAILQ_FOREACH_SAFE(ml, &ml_head, mrqueue, ml2) {
+        g_free(ml);
+    }
+}
+
+void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled)
+{
+    if (flatview) {
+        mtree_info_flatview(dispatch_tree, owner);
+    } else {
+        mtree_info_as(dispatch_tree, owner, disabled);
+    }
+}
+
+void memory_region_init_ram(MemoryRegion *mr,
+                            Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_ram_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom(MemoryRegion *mr,
+                            Object *owner,
+                            const char *name,
+                            uint64_t size,
+                            Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_nomigrate(mr, owner, name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+void memory_region_init_rom_device(MemoryRegion *mr,
+                                   Object *owner,
+                                   const MemoryRegionOps *ops,
+                                   void *opaque,
+                                   const char *name,
+                                   uint64_t size,
+                                   Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque,
+                                            name, size, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
+/*
+ * Support system builds with CONFIG_FUZZ using a weak symbol and a stub for
+ * the fuzz_dma_read_cb callback
+ */
+#ifdef CONFIG_FUZZ
+void __attribute__((weak)) fuzz_dma_read_cb(size_t addr,
+                      size_t len,
+                      MemoryRegion *mr)
+{
+}
+#endif
+
+static const TypeInfo memory_region_info = {
+    .parent             = TYPE_OBJECT,
+    .name               = TYPE_MEMORY_REGION,
+    .class_size         = sizeof(MemoryRegionClass),
+    .instance_size      = sizeof(MemoryRegion),
+    .instance_init      = memory_region_initfn,
+    .instance_finalize  = memory_region_finalize,
+};
+
+static const TypeInfo iommu_memory_region_info = {
+    .parent             = TYPE_MEMORY_REGION,
+    .name               = TYPE_IOMMU_MEMORY_REGION,
+    .class_size         = sizeof(IOMMUMemoryRegionClass),
+    .instance_size      = sizeof(IOMMUMemoryRegion),
+    .instance_init      = iommu_memory_region_initfn,
+    .abstract           = true,
+};
+
+static const TypeInfo ram_discard_manager_info = {
+    .parent             = TYPE_INTERFACE,
+    .name               = TYPE_RAM_DISCARD_MANAGER,
+    .class_size         = sizeof(RamDiscardManagerClass),
+};
+
+static void memory_register_types(void)
+{
+    type_register_static(&memory_region_info);
+    type_register_static(&iommu_memory_region_info);
+    type_register_static(&ram_discard_manager_info);
+}
+
+type_init(memory_register_types)
diff --git a/system/memory_mapping.c b/system/memory_mapping.c
new file mode 100644
index 0000000..d7f1d09
--- /dev/null
+++ b/system/memory_mapping.c
@@ -0,0 +1,377 @@
+/*
+ * QEMU memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ *     Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "sysemu/memory_mapping.h"
+#include "exec/memory.h"
+#include "exec/address-spaces.h"
+#include "hw/core/cpu.h"
+
+//#define DEBUG_GUEST_PHYS_REGION_ADD
+
+static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
+                                                   MemoryMapping *mapping)
+{
+    MemoryMapping *p;
+
+    QTAILQ_FOREACH(p, &list->head, next) {
+        if (p->phys_addr >= mapping->phys_addr) {
+            QTAILQ_INSERT_BEFORE(p, mapping, next);
+            return;
+        }
+    }
+    QTAILQ_INSERT_TAIL(&list->head, mapping, next);
+}
+
+static void create_new_memory_mapping(MemoryMappingList *list,
+                                      hwaddr phys_addr,
+                                      hwaddr virt_addr,
+                                      ram_addr_t length)
+{
+    MemoryMapping *memory_mapping;
+
+    memory_mapping = g_new(MemoryMapping, 1);
+    memory_mapping->phys_addr = phys_addr;
+    memory_mapping->virt_addr = virt_addr;
+    memory_mapping->length = length;
+    list->last_mapping = memory_mapping;
+    list->num++;
+    memory_mapping_list_add_mapping_sorted(list, memory_mapping);
+}
+
+static inline bool mapping_contiguous(MemoryMapping *map,
+                                      hwaddr phys_addr,
+                                      hwaddr virt_addr)
+{
+    return phys_addr == map->phys_addr + map->length &&
+           virt_addr == map->virt_addr + map->length;
+}
+
+/*
+ * [map->phys_addr, map->phys_addr + map->length) and
+ * [phys_addr, phys_addr + length) have intersection?
+ */
+static inline bool mapping_have_same_region(MemoryMapping *map,
+                                            hwaddr phys_addr,
+                                            ram_addr_t length)
+{
+    return !(phys_addr + length < map->phys_addr ||
+             phys_addr >= map->phys_addr + map->length);
+}
+
+/*
+ * [map->phys_addr, map->phys_addr + map->length) and
+ * [phys_addr, phys_addr + length) have intersection. The virtual address in the
+ * intersection are the same?
+ */
+static inline bool mapping_conflict(MemoryMapping *map,
+                                    hwaddr phys_addr,
+                                    hwaddr virt_addr)
+{
+    return virt_addr - map->virt_addr != phys_addr - map->phys_addr;
+}
+
+/*
+ * [map->virt_addr, map->virt_addr + map->length) and
+ * [virt_addr, virt_addr + length) have intersection. And the physical address
+ * in the intersection are the same.
+ */
+static inline void mapping_merge(MemoryMapping *map,
+                                 hwaddr virt_addr,
+                                 ram_addr_t length)
+{
+    if (virt_addr < map->virt_addr) {
+        map->length += map->virt_addr - virt_addr;
+        map->virt_addr = virt_addr;
+    }
+
+    if ((virt_addr + length) >
+        (map->virt_addr + map->length)) {
+        map->length = virt_addr + length - map->virt_addr;
+    }
+}
+
+void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
+                                          hwaddr phys_addr,
+                                          hwaddr virt_addr,
+                                          ram_addr_t length)
+{
+    MemoryMapping *memory_mapping, *last_mapping;
+
+    if (QTAILQ_EMPTY(&list->head)) {
+        create_new_memory_mapping(list, phys_addr, virt_addr, length);
+        return;
+    }
+
+    last_mapping = list->last_mapping;
+    if (last_mapping) {
+        if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) {
+            last_mapping->length += length;
+            return;
+        }
+    }
+
+    QTAILQ_FOREACH(memory_mapping, &list->head, next) {
+        if (mapping_contiguous(memory_mapping, phys_addr, virt_addr)) {
+            memory_mapping->length += length;
+            list->last_mapping = memory_mapping;
+            return;
+        }
+
+        if (phys_addr + length < memory_mapping->phys_addr) {
+            /* create a new region before memory_mapping */
+            break;
+        }
+
+        if (mapping_have_same_region(memory_mapping, phys_addr, length)) {
+            if (mapping_conflict(memory_mapping, phys_addr, virt_addr)) {
+                continue;
+            }
+
+            /* merge this region into memory_mapping */
+            mapping_merge(memory_mapping, virt_addr, length);
+            list->last_mapping = memory_mapping;
+            return;
+        }
+    }
+
+    /* this region can not be merged into any existed memory mapping. */
+    create_new_memory_mapping(list, phys_addr, virt_addr, length);
+}
+
+void memory_mapping_list_free(MemoryMappingList *list)
+{
+    MemoryMapping *p, *q;
+
+    QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
+        QTAILQ_REMOVE(&list->head, p, next);
+        g_free(p);
+    }
+
+    list->num = 0;
+    list->last_mapping = NULL;
+}
+
+void memory_mapping_list_init(MemoryMappingList *list)
+{
+    list->num = 0;
+    list->last_mapping = NULL;
+    QTAILQ_INIT(&list->head);
+}
+
+void guest_phys_blocks_free(GuestPhysBlockList *list)
+{
+    GuestPhysBlock *p, *q;
+
+    QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
+        QTAILQ_REMOVE(&list->head, p, next);
+        memory_region_unref(p->mr);
+        g_free(p);
+    }
+    list->num = 0;
+}
+
+void guest_phys_blocks_init(GuestPhysBlockList *list)
+{
+    list->num = 0;
+    QTAILQ_INIT(&list->head);
+}
+
+typedef struct GuestPhysListener {
+    GuestPhysBlockList *list;
+    MemoryListener listener;
+} GuestPhysListener;
+
+static void guest_phys_block_add_section(GuestPhysListener *g,
+                                         MemoryRegionSection *section)
+{
+    const hwaddr target_start = section->offset_within_address_space;
+    const hwaddr target_end = target_start + int128_get64(section->size);
+    uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) +
+                         section->offset_within_region;
+    GuestPhysBlock *predecessor = NULL;
+
+    /* find continuity in guest physical address space */
+    if (!QTAILQ_EMPTY(&g->list->head)) {
+        hwaddr predecessor_size;
+
+        predecessor = QTAILQ_LAST(&g->list->head);
+        predecessor_size = predecessor->target_end - predecessor->target_start;
+
+        /* the memory API guarantees monotonically increasing traversal */
+        g_assert(predecessor->target_end <= target_start);
+
+        /* we want continuity in both guest-physical and host-virtual memory */
+        if (predecessor->target_end < target_start ||
+            predecessor->host_addr + predecessor_size != host_addr ||
+            predecessor->mr != section->mr) {
+            predecessor = NULL;
+        }
+    }
+
+    if (predecessor == NULL) {
+        /* isolated mapping, allocate it and add it to the list */
+        GuestPhysBlock *block = g_malloc0(sizeof *block);
+
+        block->target_start = target_start;
+        block->target_end   = target_end;
+        block->host_addr    = host_addr;
+        block->mr           = section->mr;
+        memory_region_ref(section->mr);
+
+        QTAILQ_INSERT_TAIL(&g->list->head, block, next);
+        ++g->list->num;
+    } else {
+        /* expand predecessor until @target_end; predecessor's start doesn't
+         * change
+         */
+        predecessor->target_end = target_end;
+    }
+
+#ifdef DEBUG_GUEST_PHYS_REGION_ADD
+    fprintf(stderr, "%s: target_start=" HWADDR_FMT_plx " target_end="
+            HWADDR_FMT_plx ": %s (count: %u)\n", __func__, target_start,
+            target_end, predecessor ? "joined" : "added", g->list->num);
+#endif
+}
+
+static int guest_phys_ram_populate_cb(MemoryRegionSection *section,
+                                      void *opaque)
+{
+    GuestPhysListener *g = opaque;
+
+    guest_phys_block_add_section(g, section);
+    return 0;
+}
+
+static void guest_phys_blocks_region_add(MemoryListener *listener,
+                                         MemoryRegionSection *section)
+{
+    GuestPhysListener *g = container_of(listener, GuestPhysListener, listener);
+
+    /* we only care about RAM */
+    if (!memory_region_is_ram(section->mr) ||
+        memory_region_is_ram_device(section->mr) ||
+        memory_region_is_nonvolatile(section->mr)) {
+        return;
+    }
+
+    /* for special sparse regions, only add populated parts */
+    if (memory_region_has_ram_discard_manager(section->mr)) {
+        RamDiscardManager *rdm;
+
+        rdm = memory_region_get_ram_discard_manager(section->mr);
+        ram_discard_manager_replay_populated(rdm, section,
+                                             guest_phys_ram_populate_cb, g);
+        return;
+    }
+
+    guest_phys_block_add_section(g, section);
+}
+
+void guest_phys_blocks_append(GuestPhysBlockList *list)
+{
+    GuestPhysListener g = { 0 };
+
+    g.list = list;
+    g.listener.region_add = &guest_phys_blocks_region_add;
+    memory_listener_register(&g.listener, &address_space_memory);
+    memory_listener_unregister(&g.listener);
+}
+
+static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu_paging_enabled(cpu)) {
+            return cpu;
+        }
+    }
+
+    return NULL;
+}
+
+void qemu_get_guest_memory_mapping(MemoryMappingList *list,
+                                   const GuestPhysBlockList *guest_phys_blocks,
+                                   Error **errp)
+{
+    CPUState *cpu, *first_paging_enabled_cpu;
+    GuestPhysBlock *block;
+    ram_addr_t offset, length;
+
+    first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
+    if (first_paging_enabled_cpu) {
+        for (cpu = first_paging_enabled_cpu; cpu != NULL;
+             cpu = CPU_NEXT(cpu)) {
+            Error *err = NULL;
+            cpu_get_memory_mapping(cpu, list, &err);
+            if (err) {
+                error_propagate(errp, err);
+                return;
+            }
+        }
+        return;
+    }
+
+    /*
+     * If the guest doesn't use paging, the virtual address is equal to physical
+     * address.
+     */
+    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
+        offset = block->target_start;
+        length = block->target_end - block->target_start;
+        create_new_memory_mapping(list, offset, offset, length);
+    }
+}
+
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
+                                   const GuestPhysBlockList *guest_phys_blocks)
+{
+    GuestPhysBlock *block;
+
+    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
+        create_new_memory_mapping(list, block->target_start, 0,
+                                  block->target_end - block->target_start);
+    }
+}
+
+void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
+                           int64_t length)
+{
+    MemoryMapping *cur, *next;
+
+    QTAILQ_FOREACH_SAFE(cur, &list->head, next, next) {
+        if (cur->phys_addr >= begin + length ||
+            cur->phys_addr + cur->length <= begin) {
+            QTAILQ_REMOVE(&list->head, cur, next);
+            g_free(cur);
+            list->num--;
+            continue;
+        }
+
+        if (cur->phys_addr < begin) {
+            cur->length -= begin - cur->phys_addr;
+            if (cur->virt_addr) {
+                cur->virt_addr += begin - cur->phys_addr;
+            }
+            cur->phys_addr = begin;
+        }
+
+        if (cur->phys_addr + cur->length > begin + length) {
+            cur->length -= cur->phys_addr + cur->length - begin - length;
+        }
+    }
+}
diff --git a/system/meson.build b/system/meson.build
new file mode 100644
index 0000000..3a64dd8
--- /dev/null
+++ b/system/meson.build
@@ -0,0 +1,36 @@
+specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: [files(
+  'arch_init.c',
+  'ioport.c',
+  'memory.c',
+  'physmem.c',
+  'watchpoint.c',
+)])
+
+system_ss.add(files(
+  'balloon.c',
+  'bootdevice.c',
+  'cpus.c',
+  'cpu-throttle.c',
+  'cpu-timers.c',
+  'datadir.c',
+  'dirtylimit.c',
+  'dma-helpers.c',
+  'globals.c',
+  'memory_mapping.c',
+  'qdev-monitor.c',
+  'qtest.c',
+  'rtc.c',
+  'runstate-action.c',
+  'runstate-hmp-cmds.c',
+  'runstate.c',
+  'tpm-hmp-cmds.c',
+  'vl.c',
+), sdl, libpmem, libdaxctl)
+
+if have_tpm
+  system_ss.add(files('tpm.c'))
+endif
+
+system_ss.add(when: seccomp, if_true: files('qemu-seccomp.c'))
+system_ss.add(when: fdt, if_true: files('device_tree.c'))
+system_ss.add(when: 'CONFIG_LINUX', if_true: files('async-teardown.c'))
diff --git a/system/physmem.c b/system/physmem.c
new file mode 100644
index 0000000..edc3ed8
--- /dev/null
+++ b/system/physmem.c
@@ -0,0 +1,3796 @@
+/*
+ * RAM allocation and memory access
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/page-vary.h"
+#include "qapi/error.h"
+
+#include "qemu/cutils.h"
+#include "qemu/cacheflush.h"
+#include "qemu/hbitmap.h"
+#include "qemu/madvise.h"
+
+#ifdef CONFIG_TCG
+#include "hw/core/tcg-cpu-ops.h"
+#endif /* CONFIG_TCG */
+
+#include "exec/exec-all.h"
+#include "exec/target_page.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "hw/xen/xen.h"
+#include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
+#include "sysemu/qtest.h"
+#include "qemu/timer.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qemu/qemu-print.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
+#include "exec/memory.h"
+#include "exec/ioport.h"
+#include "sysemu/dma.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/xen-mapcache.h"
+#include "trace/trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <linux/falloc.h>
+#endif
+
+#include "qemu/rcu_queue.h"
+#include "qemu/main-loop.h"
+#include "exec/translate-all.h"
+#include "sysemu/replay.h"
+
+#include "exec/memory-internal.h"
+#include "exec/ram_addr.h"
+
+#include "qemu/pmem.h"
+
+#include "migration/vmstate.h"
+
+#include "qemu/range.h"
+#ifndef _WIN32
+#include "qemu/mmap-alloc.h"
+#endif
+
+#include "monitor/monitor.h"
+
+#ifdef CONFIG_LIBDAXCTL
+#include <daxctl/libdaxctl.h>
+#endif
+
+//#define DEBUG_SUBPAGE
+
+/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
+ * are protected by the ramlist lock.
+ */
+RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
+
+static MemoryRegion *system_memory;
+static MemoryRegion *system_io;
+
+AddressSpace address_space_io;
+AddressSpace address_space_memory;
+
+static MemoryRegion io_mem_unassigned;
+
+typedef struct PhysPageEntry PhysPageEntry;
+
+struct PhysPageEntry {
+    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
+    uint32_t skip : 6;
+     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
+    uint32_t ptr : 26;
+};
+
+#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
+
+/* Size of the L2 (and L3, etc) page tables.  */
+#define ADDR_SPACE_BITS 64
+
+#define P_L2_BITS 9
+#define P_L2_SIZE (1 << P_L2_BITS)
+
+#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
+
+typedef PhysPageEntry Node[P_L2_SIZE];
+
+typedef struct PhysPageMap {
+    struct rcu_head rcu;
+
+    unsigned sections_nb;
+    unsigned sections_nb_alloc;
+    unsigned nodes_nb;
+    unsigned nodes_nb_alloc;
+    Node *nodes;
+    MemoryRegionSection *sections;
+} PhysPageMap;
+
+struct AddressSpaceDispatch {
+    MemoryRegionSection *mru_section;
+    /* This is a multi-level map on the physical address space.
+     * The bottom level has pointers to MemoryRegionSections.
+     */
+    PhysPageEntry phys_map;
+    PhysPageMap map;
+};
+
+#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
+typedef struct subpage_t {
+    MemoryRegion iomem;
+    FlatView *fv;
+    hwaddr base;
+    uint16_t sub_section[];
+} subpage_t;
+
+#define PHYS_SECTION_UNASSIGNED 0
+
+static void io_mem_init(void);
+static void memory_map_init(void);
+static void tcg_log_global_after_sync(MemoryListener *listener);
+static void tcg_commit(MemoryListener *listener);
+
+/**
+ * CPUAddressSpace: all the information a CPU needs about an AddressSpace
+ * @cpu: the CPU whose AddressSpace this is
+ * @as: the AddressSpace itself
+ * @memory_dispatch: its dispatch pointer (cached, RCU protected)
+ * @tcg_as_listener: listener for tracking changes to the AddressSpace
+ */
+struct CPUAddressSpace {
+    CPUState *cpu;
+    AddressSpace *as;
+    struct AddressSpaceDispatch *memory_dispatch;
+    MemoryListener tcg_as_listener;
+};
+
+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
+static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
+{
+    static unsigned alloc_hint = 16;
+    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
+        map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes);
+        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
+        alloc_hint = map->nodes_nb_alloc;
+    }
+}
+
+static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
+{
+    unsigned i;
+    uint32_t ret;
+    PhysPageEntry e;
+    PhysPageEntry *p;
+
+    ret = map->nodes_nb++;
+    p = map->nodes[ret];
+    assert(ret != PHYS_MAP_NODE_NIL);
+    assert(ret != map->nodes_nb_alloc);
+
+    e.skip = leaf ? 0 : 1;
+    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
+    for (i = 0; i < P_L2_SIZE; ++i) {
+        memcpy(&p[i], &e, sizeof(e));
+    }
+    return ret;
+}
+
+static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
+                                hwaddr *index, uint64_t *nb, uint16_t leaf,
+                                int level)
+{
+    PhysPageEntry *p;
+    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
+
+    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
+        lp->ptr = phys_map_node_alloc(map, level == 0);
+    }
+    p = map->nodes[lp->ptr];
+    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
+
+    while (*nb && lp < &p[P_L2_SIZE]) {
+        if ((*index & (step - 1)) == 0 && *nb >= step) {
+            lp->skip = 0;
+            lp->ptr = leaf;
+            *index += step;
+            *nb -= step;
+        } else {
+            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
+        }
+        ++lp;
+    }
+}
+
+static void phys_page_set(AddressSpaceDispatch *d,
+                          hwaddr index, uint64_t nb,
+                          uint16_t leaf)
+{
+    /* Wildly overreserve - it doesn't matter much. */
+    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
+
+    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+}
+
+/* Compact a non leaf page entry. Simply detect that the entry has a single child,
+ * and update our entry so we can skip it and go directly to the destination.
+ */
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
+{
+    unsigned valid_ptr = P_L2_SIZE;
+    int valid = 0;
+    PhysPageEntry *p;
+    int i;
+
+    if (lp->ptr == PHYS_MAP_NODE_NIL) {
+        return;
+    }
+
+    p = nodes[lp->ptr];
+    for (i = 0; i < P_L2_SIZE; i++) {
+        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
+            continue;
+        }
+
+        valid_ptr = i;
+        valid++;
+        if (p[i].skip) {
+            phys_page_compact(&p[i], nodes);
+        }
+    }
+
+    /* We can only compress if there's only one child. */
+    if (valid != 1) {
+        return;
+    }
+
+    assert(valid_ptr < P_L2_SIZE);
+
+    /* Don't compress if it won't fit in the # of bits we have. */
+    if (P_L2_LEVELS >= (1 << 6) &&
+        lp->skip + p[valid_ptr].skip >= (1 << 6)) {
+        return;
+    }
+
+    lp->ptr = p[valid_ptr].ptr;
+    if (!p[valid_ptr].skip) {
+        /* If our only child is a leaf, make this a leaf. */
+        /* By design, we should have made this node a leaf to begin with so we
+         * should never reach here.
+         * But since it's so simple to handle this, let's do it just in case we
+         * change this rule.
+         */
+        lp->skip = 0;
+    } else {
+        lp->skip += p[valid_ptr].skip;
+    }
+}
+
+void address_space_dispatch_compact(AddressSpaceDispatch *d)
+{
+    if (d->phys_map.skip) {
+        phys_page_compact(&d->phys_map, d->map.nodes);
+    }
+}
+
+static inline bool section_covers_addr(const MemoryRegionSection *section,
+                                       hwaddr addr)
+{
+    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
+     * the section must cover the entire address space.
+     */
+    return int128_gethi(section->size) ||
+           range_covers_byte(section->offset_within_address_space,
+                             int128_getlo(section->size), addr);
+}
+
+static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
+{
+    PhysPageEntry lp = d->phys_map, *p;
+    Node *nodes = d->map.nodes;
+    MemoryRegionSection *sections = d->map.sections;
+    hwaddr index = addr >> TARGET_PAGE_BITS;
+    int i;
+
+    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
+        if (lp.ptr == PHYS_MAP_NODE_NIL) {
+            return &sections[PHYS_SECTION_UNASSIGNED];
+        }
+        p = nodes[lp.ptr];
+        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
+    }
+
+    if (section_covers_addr(&sections[lp.ptr], addr)) {
+        return &sections[lp.ptr];
+    } else {
+        return &sections[PHYS_SECTION_UNASSIGNED];
+    }
+}
+
+/* Called from RCU critical section */
+static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
+                                                        hwaddr addr,
+                                                        bool resolve_subpage)
+{
+    MemoryRegionSection *section = qatomic_read(&d->mru_section);
+    subpage_t *subpage;
+
+    if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] ||
+        !section_covers_addr(section, addr)) {
+        section = phys_page_find(d, addr);
+        qatomic_set(&d->mru_section, section);
+    }
+    if (resolve_subpage && section->mr->subpage) {
+        subpage = container_of(section->mr, subpage_t, iomem);
+        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
+    }
+    return section;
+}
+
+/* Called from RCU critical section */
+static MemoryRegionSection *
+address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
+                                 hwaddr *plen, bool resolve_subpage)
+{
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    Int128 diff;
+
+    section = address_space_lookup_region(d, addr, resolve_subpage);
+    /* Compute offset within MemoryRegionSection */
+    addr -= section->offset_within_address_space;
+
+    /* Compute offset within MemoryRegion */
+    *xlat = addr + section->offset_within_region;
+
+    mr = section->mr;
+
+    /* MMIO registers can be expected to perform full-width accesses based only
+     * on their address, without considering adjacent registers that could
+     * decode to completely different MemoryRegions.  When such registers
+     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
+     * regions overlap wildly.  For this reason we cannot clamp the accesses
+     * here.
+     *
+     * If the length is small (as is the case for address_space_ldl/stl),
+     * everything works fine.  If the incoming length is large, however,
+     * the caller really has to do the clamping through memory_access_size.
+     */
+    if (memory_region_is_ram(mr)) {
+        diff = int128_sub(section->size, int128_make64(addr));
+        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
+    }
+    return section;
+}
+
+/**
+ * address_space_translate_iommu - translate an address through an IOMMU
+ * memory region and then through the target address space.
+ *
+ * @iommu_mr: the IOMMU memory region that we start the translation from
+ * @addr: the address to be translated through the MMU
+ * @xlat: the translated address offset within the destination memory region.
+ *        It cannot be %NULL.
+ * @plen_out: valid read/write length of the translated address. It
+ *            cannot be %NULL.
+ * @page_mask_out: page mask for the translated address. This
+ *            should only be meaningful for IOMMU translated
+ *            addresses, since there may be huge pages that this bit
+ *            would tell. It can be %NULL if we don't care about it.
+ * @is_write: whether the translation operation is for write
+ * @is_mmio: whether this can be MMIO, set true if it can
+ * @target_as: the address space targeted by the IOMMU
+ * @attrs: transaction attributes
+ *
+ * This function is called from RCU critical section.  It is the common
+ * part of flatview_do_translate and address_space_translate_cached.
+ */
+static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr,
+                                                         hwaddr *xlat,
+                                                         hwaddr *plen_out,
+                                                         hwaddr *page_mask_out,
+                                                         bool is_write,
+                                                         bool is_mmio,
+                                                         AddressSpace **target_as,
+                                                         MemTxAttrs attrs)
+{
+    MemoryRegionSection *section;
+    hwaddr page_mask = (hwaddr)-1;
+
+    do {
+        hwaddr addr = *xlat;
+        IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+        int iommu_idx = 0;
+        IOMMUTLBEntry iotlb;
+
+        if (imrc->attrs_to_index) {
+            iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+        }
+
+        iotlb = imrc->translate(iommu_mr, addr, is_write ?
+                                IOMMU_WO : IOMMU_RO, iommu_idx);
+
+        if (!(iotlb.perm & (1 << is_write))) {
+            goto unassigned;
+        }
+
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        page_mask &= iotlb.addr_mask;
+        *plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1);
+        *target_as = iotlb.target_as;
+
+        section = address_space_translate_internal(
+                address_space_to_dispatch(iotlb.target_as), addr, xlat,
+                plen_out, is_mmio);
+
+        iommu_mr = memory_region_get_iommu(section->mr);
+    } while (unlikely(iommu_mr));
+
+    if (page_mask_out) {
+        *page_mask_out = page_mask;
+    }
+    return *section;
+
+unassigned:
+    return (MemoryRegionSection) { .mr = &io_mem_unassigned };
+}
+
+/**
+ * flatview_do_translate - translate an address in FlatView
+ *
+ * @fv: the flat view that we want to translate on
+ * @addr: the address to be translated in above address space
+ * @xlat: the translated address offset within memory region. It
+ *        cannot be @NULL.
+ * @plen_out: valid read/write length of the translated address. It
+ *            can be @NULL when we don't care about it.
+ * @page_mask_out: page mask for the translated address. This
+ *            should only be meaningful for IOMMU translated
+ *            addresses, since there may be huge pages that this bit
+ *            would tell. It can be @NULL if we don't care about it.
+ * @is_write: whether the translation operation is for write
+ * @is_mmio: whether this can be MMIO, set true if it can
+ * @target_as: the address space targeted by the IOMMU
+ * @attrs: memory transaction attributes
+ *
+ * This function is called from RCU critical section
+ */
+static MemoryRegionSection flatview_do_translate(FlatView *fv,
+                                                 hwaddr addr,
+                                                 hwaddr *xlat,
+                                                 hwaddr *plen_out,
+                                                 hwaddr *page_mask_out,
+                                                 bool is_write,
+                                                 bool is_mmio,
+                                                 AddressSpace **target_as,
+                                                 MemTxAttrs attrs)
+{
+    MemoryRegionSection *section;
+    IOMMUMemoryRegion *iommu_mr;
+    hwaddr plen = (hwaddr)(-1);
+
+    if (!plen_out) {
+        plen_out = &plen;
+    }
+
+    section = address_space_translate_internal(
+            flatview_to_dispatch(fv), addr, xlat,
+            plen_out, is_mmio);
+
+    iommu_mr = memory_region_get_iommu(section->mr);
+    if (unlikely(iommu_mr)) {
+        return address_space_translate_iommu(iommu_mr, xlat,
+                                             plen_out, page_mask_out,
+                                             is_write, is_mmio,
+                                             target_as, attrs);
+    }
+    if (page_mask_out) {
+        /* Not behind an IOMMU, use default page size. */
+        *page_mask_out = ~TARGET_PAGE_MASK;
+    }
+
+    return *section;
+}
+
+/* Called from RCU critical section */
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+                                            bool is_write, MemTxAttrs attrs)
+{
+    MemoryRegionSection section;
+    hwaddr xlat, page_mask;
+
+    /*
+     * This can never be MMIO, and we don't really care about plen,
+     * but page mask.
+     */
+    section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat,
+                                    NULL, &page_mask, is_write, false, &as,
+                                    attrs);
+
+    /* Illegal translation */
+    if (section.mr == &io_mem_unassigned) {
+        goto iotlb_fail;
+    }
+
+    /* Convert memory region offset into address space offset */
+    xlat += section.offset_within_address_space -
+        section.offset_within_region;
+
+    return (IOMMUTLBEntry) {
+        .target_as = as,
+        .iova = addr & ~page_mask,
+        .translated_addr = xlat & ~page_mask,
+        .addr_mask = page_mask,
+        /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
+        .perm = IOMMU_RW,
+    };
+
+iotlb_fail:
+    return (IOMMUTLBEntry) {0};
+}
+
+/* Called from RCU critical section */
+MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
+                                 hwaddr *plen, bool is_write,
+                                 MemTxAttrs attrs)
+{
+    MemoryRegion *mr;
+    MemoryRegionSection section;
+    AddressSpace *as = NULL;
+
+    /* This can be MMIO, so setup MMIO bit. */
+    section = flatview_do_translate(fv, addr, xlat, plen, NULL,
+                                    is_write, true, &as, attrs);
+    mr = section.mr;
+
+    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
+        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
+        *plen = MIN(page, *plen);
+    }
+
+    return mr;
+}
+
+typedef struct TCGIOMMUNotifier {
+    IOMMUNotifier n;
+    MemoryRegion *mr;
+    CPUState *cpu;
+    int iommu_idx;
+    bool active;
+} TCGIOMMUNotifier;
+
+static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
+
+    if (!notifier->active) {
+        return;
+    }
+    tlb_flush(notifier->cpu);
+    notifier->active = false;
+    /* We leave the notifier struct on the list to avoid reallocating it later.
+     * Generally the number of IOMMUs a CPU deals with will be small.
+     * In any case we can't unregister the iommu notifier from a notify
+     * callback.
+     */
+}
+
+static void tcg_register_iommu_notifier(CPUState *cpu,
+                                        IOMMUMemoryRegion *iommu_mr,
+                                        int iommu_idx)
+{
+    /* Make sure this CPU has an IOMMU notifier registered for this
+     * IOMMU/IOMMU index combination, so that we can flush its TLB
+     * when the IOMMU tells us the mappings we've cached have changed.
+     */
+    MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+    TCGIOMMUNotifier *notifier = NULL;
+    int i;
+
+    for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+        notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
+        if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
+            break;
+        }
+    }
+    if (i == cpu->iommu_notifiers->len) {
+        /* Not found, add a new entry at the end of the array */
+        cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
+        notifier = g_new0(TCGIOMMUNotifier, 1);
+        g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier;
+
+        notifier->mr = mr;
+        notifier->iommu_idx = iommu_idx;
+        notifier->cpu = cpu;
+        /* Rather than trying to register interest in the specific part
+         * of the iommu's address space that we've accessed and then
+         * expand it later as subsequent accesses touch more of it, we
+         * just register interest in the whole thing, on the assumption
+         * that iommu reconfiguration will be rare.
+         */
+        iommu_notifier_init(&notifier->n,
+                            tcg_iommu_unmap_notify,
+                            IOMMU_NOTIFIER_UNMAP,
+                            0,
+                            HWADDR_MAX,
+                            iommu_idx);
+        memory_region_register_iommu_notifier(notifier->mr, &notifier->n,
+                                              &error_fatal);
+    }
+
+    if (!notifier->active) {
+        notifier->active = true;
+    }
+}
+
+void tcg_iommu_free_notifier_list(CPUState *cpu)
+{
+    /* Destroy the CPU's notifier list */
+    int i;
+    TCGIOMMUNotifier *notifier;
+
+    for (i = 0; i < cpu->iommu_notifiers->len; i++) {
+        notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
+        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);
+        g_free(notifier);
+    }
+    g_array_free(cpu->iommu_notifiers, true);
+}
+
+void tcg_iommu_init_notifier_list(CPUState *cpu)
+{
+    cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *));
+}
+
+/* Called from RCU critical section */
+MemoryRegionSection *
+address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
+                                  hwaddr *xlat, hwaddr *plen,
+                                  MemTxAttrs attrs, int *prot)
+{
+    MemoryRegionSection *section;
+    IOMMUMemoryRegion *iommu_mr;
+    IOMMUMemoryRegionClass *imrc;
+    IOMMUTLBEntry iotlb;
+    int iommu_idx;
+    hwaddr addr = orig_addr;
+    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+
+    for (;;) {
+        section = address_space_translate_internal(d, addr, &addr, plen, false);
+
+        iommu_mr = memory_region_get_iommu(section->mr);
+        if (!iommu_mr) {
+            break;
+        }
+
+        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+
+        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
+        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
+         * doesn't short-cut its translation table walk.
+         */
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        /* Update the caller's prot bits to remove permissions the IOMMU
+         * is giving us a failure response for. If we get down to no
+         * permissions left at all we can give up now.
+         */
+        if (!(iotlb.perm & IOMMU_RO)) {
+            *prot &= ~(PAGE_READ | PAGE_EXEC);
+        }
+        if (!(iotlb.perm & IOMMU_WO)) {
+            *prot &= ~PAGE_WRITE;
+        }
+
+        if (!*prot) {
+            goto translate_fail;
+        }
+
+        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
+    }
+
+    assert(!memory_region_is_iommu(section->mr));
+    *xlat = addr;
+    return section;
+
+translate_fail:
+    /*
+     * We should be given a page-aligned address -- certainly
+     * tlb_set_page_with_attrs() does so.  The page offset of xlat
+     * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0.
+     * The page portion of xlat will be logged by memory_region_access_valid()
+     * when this memory access is rejected, so use the original untranslated
+     * physical address.
+     */
+    assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
+    *xlat = orig_addr;
+    return &d->map.sections[PHYS_SECTION_UNASSIGNED];
+}
+
+void cpu_address_space_init(CPUState *cpu, int asidx,
+                            const char *prefix, MemoryRegion *mr)
+{
+    CPUAddressSpace *newas;
+    AddressSpace *as = g_new0(AddressSpace, 1);
+    char *as_name;
+
+    assert(mr);
+    as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index);
+    address_space_init(as, mr, as_name);
+    g_free(as_name);
+
+    /* Target code should have set num_ases before calling us */
+    assert(asidx < cpu->num_ases);
+
+    if (asidx == 0) {
+        /* address space 0 gets the convenience alias */
+        cpu->as = as;
+    }
+
+    /* KVM cannot currently support multiple address spaces. */
+    assert(asidx == 0 || !kvm_enabled());
+
+    if (!cpu->cpu_ases) {
+        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
+    }
+
+    newas = &cpu->cpu_ases[asidx];
+    newas->cpu = cpu;
+    newas->as = as;
+    if (tcg_enabled()) {
+        newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
+        newas->tcg_as_listener.commit = tcg_commit;
+        newas->tcg_as_listener.name = "tcg";
+        memory_listener_register(&newas->tcg_as_listener, as);
+    }
+}
+
+AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
+{
+    /* Return the AddressSpace corresponding to the specified index */
+    return cpu->cpu_ases[asidx].as;
+}
+
+/* Called from RCU critical section */
+static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
+{
+    RAMBlock *block;
+
+    block = qatomic_rcu_read(&ram_list.mru_block);
+    if (block && addr - block->offset < block->max_length) {
+        return block;
+    }
+    RAMBLOCK_FOREACH(block) {
+        if (addr - block->offset < block->max_length) {
+            goto found;
+        }
+    }
+
+    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+    abort();
+
+found:
+    /* It is safe to write mru_block outside the iothread lock.  This
+     * is what happens:
+     *
+     *     mru_block = xxx
+     *     rcu_read_unlock()
+     *                                        xxx removed from list
+     *                  rcu_read_lock()
+     *                  read mru_block
+     *                                        mru_block = NULL;
+     *                                        call_rcu(reclaim_ramblock, xxx);
+     *                  rcu_read_unlock()
+     *
+     * qatomic_rcu_set is not needed here.  The block was already published
+     * when it was placed into the list.  Here we're just making an extra
+     * copy of the pointer.
+     */
+    ram_list.mru_block = block;
+    return block;
+}
+
+static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
+{
+    CPUState *cpu;
+    ram_addr_t start1;
+    RAMBlock *block;
+    ram_addr_t end;
+
+    assert(tcg_enabled());
+    end = TARGET_PAGE_ALIGN(start + length);
+    start &= TARGET_PAGE_MASK;
+
+    RCU_READ_LOCK_GUARD();
+    block = qemu_get_ram_block(start);
+    assert(block == qemu_get_ram_block(end - 1));
+    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
+    CPU_FOREACH(cpu) {
+        tlb_reset_dirty(cpu, start1, length);
+    }
+}
+
+/* Note: start and end must be within the same ram block.  */
+bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
+                                              ram_addr_t length,
+                                              unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long end, page, start_page;
+    bool dirty = false;
+    RAMBlock *ramblock;
+    uint64_t mr_offset, mr_size;
+
+    if (length == 0) {
+        return false;
+    }
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    start_page = start >> TARGET_PAGE_BITS;
+    page = start_page;
+
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
+        ramblock = qemu_get_ram_block(start);
+        /* Range sanity check on the ramblock */
+        assert(start >= ramblock->offset &&
+               start + length <= ramblock->offset + ramblock->used_length);
+
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+            dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
+                                                  offset, num);
+            page += num;
+        }
+
+        mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
+        mr_size = (end - start_page) << TARGET_PAGE_BITS;
+        memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
+    }
+
+    if (dirty && tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return dirty;
+}
+
+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+    (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    ram_addr_t start = memory_region_get_ram_addr(mr) + offset;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    WITH_RCU_READ_LOCK_GUARD() {
+        blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
+
+        while (page < end) {
+            unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long ofs = page % DIRTY_MEMORY_BLOCK_SIZE;
+            unsigned long num = MIN(end - page,
+                                    DIRTY_MEMORY_BLOCK_SIZE - ofs);
+
+            assert(QEMU_IS_ALIGNED(ofs, (1 << BITS_PER_LEVEL)));
+            assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+            ofs >>= BITS_PER_LEVEL;
+
+            bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                         blocks->blocks[idx] + ofs,
+                                         num);
+            page += num;
+            dest += num >> BITS_PER_LEVEL;
+        }
+    }
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    memory_region_clear_dirty_bitmap(mr, offset, length);
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
+/* Called from RCU critical section */
+hwaddr memory_region_section_get_iotlb(CPUState *cpu,
+                                       MemoryRegionSection *section)
+{
+    AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
+    return section - d->map.sections;
+}
+
+static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
+                            uint16_t section);
+static subpage_t *subpage_init(FlatView *fv, hwaddr base);
+
+static uint16_t phys_section_add(PhysPageMap *map,
+                                 MemoryRegionSection *section)
+{
+    /* The physical section number is ORed with a page-aligned
+     * pointer to produce the iotlb entries.  Thus it should
+     * never overflow into the page-aligned value.
+     */
+    assert(map->sections_nb < TARGET_PAGE_SIZE);
+
+    if (map->sections_nb == map->sections_nb_alloc) {
+        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
+        map->sections = g_renew(MemoryRegionSection, map->sections,
+                                map->sections_nb_alloc);
+    }
+    map->sections[map->sections_nb] = *section;
+    memory_region_ref(section->mr);
+    return map->sections_nb++;
+}
+
+static void phys_section_destroy(MemoryRegion *mr)
+{
+    bool have_sub_page = mr->subpage;
+
+    memory_region_unref(mr);
+
+    if (have_sub_page) {
+        subpage_t *subpage = container_of(mr, subpage_t, iomem);
+        object_unref(OBJECT(&subpage->iomem));
+        g_free(subpage);
+    }
+}
+
+static void phys_sections_free(PhysPageMap *map)
+{
+    while (map->sections_nb > 0) {
+        MemoryRegionSection *section = &map->sections[--map->sections_nb];
+        phys_section_destroy(section->mr);
+    }
+    g_free(map->sections);
+    g_free(map->nodes);
+}
+
+static void register_subpage(FlatView *fv, MemoryRegionSection *section)
+{
+    AddressSpaceDispatch *d = flatview_to_dispatch(fv);
+    subpage_t *subpage;
+    hwaddr base = section->offset_within_address_space
+        & TARGET_PAGE_MASK;
+    MemoryRegionSection *existing = phys_page_find(d, base);
+    MemoryRegionSection subsection = {
+        .offset_within_address_space = base,
+        .size = int128_make64(TARGET_PAGE_SIZE),
+    };
+    hwaddr start, end;
+
+    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
+
+    if (!(existing->mr->subpage)) {
+        subpage = subpage_init(fv, base);
+        subsection.fv = fv;
+        subsection.mr = &subpage->iomem;
+        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
+                      phys_section_add(&d->map, &subsection));
+    } else {
+        subpage = container_of(existing->mr, subpage_t, iomem);
+    }
+    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
+    end = start + int128_get64(section->size) - 1;
+    subpage_register(subpage, start, end,
+                     phys_section_add(&d->map, section));
+}
+
+
+static void register_multipage(FlatView *fv,
+                               MemoryRegionSection *section)
+{
+    AddressSpaceDispatch *d = flatview_to_dispatch(fv);
+    hwaddr start_addr = section->offset_within_address_space;
+    uint16_t section_index = phys_section_add(&d->map, section);
+    uint64_t num_pages = int128_get64(int128_rshift(section->size,
+                                                    TARGET_PAGE_BITS));
+
+    assert(num_pages);
+    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
+}
+
+/*
+ * The range in *section* may look like this:
+ *
+ *      |s|PPPPPPP|s|
+ *
+ * where s stands for subpage and P for page.
+ */
+void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section)
+{
+    MemoryRegionSection remain = *section;
+    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
+
+    /* register first subpage */
+    if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
+        uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space)
+                        - remain.offset_within_address_space;
+
+        MemoryRegionSection now = remain;
+        now.size = int128_min(int128_make64(left), now.size);
+        register_subpage(fv, &now);
+        if (int128_eq(remain.size, now.size)) {
+            return;
+        }
+        remain.size = int128_sub(remain.size, now.size);
+        remain.offset_within_address_space += int128_get64(now.size);
+        remain.offset_within_region += int128_get64(now.size);
+    }
+
+    /* register whole pages */
+    if (int128_ge(remain.size, page_size)) {
+        MemoryRegionSection now = remain;
+        now.size = int128_and(now.size, int128_neg(page_size));
+        register_multipage(fv, &now);
+        if (int128_eq(remain.size, now.size)) {
+            return;
+        }
+        remain.size = int128_sub(remain.size, now.size);
+        remain.offset_within_address_space += int128_get64(now.size);
+        remain.offset_within_region += int128_get64(now.size);
+    }
+
+    /* register last subpage */
+    register_subpage(fv, &remain);
+}
+
+void qemu_flush_coalesced_mmio_buffer(void)
+{
+    if (kvm_enabled())
+        kvm_flush_coalesced_mmio_buffer();
+}
+
+void qemu_mutex_lock_ramlist(void)
+{
+    qemu_mutex_lock(&ram_list.mutex);
+}
+
+void qemu_mutex_unlock_ramlist(void)
+{
+    qemu_mutex_unlock(&ram_list.mutex);
+}
+
+GString *ram_block_format(void)
+{
+    RAMBlock *block;
+    char *psize;
+    GString *buf = g_string_new("");
+
+    RCU_READ_LOCK_GUARD();
+    g_string_append_printf(buf, "%24s %8s  %18s %18s %18s %18s %3s\n",
+                           "Block Name", "PSize", "Offset", "Used", "Total",
+                           "HVA", "RO");
+
+    RAMBLOCK_FOREACH(block) {
+        psize = size_to_str(block->page_size);
+        g_string_append_printf(buf, "%24s %8s  0x%016" PRIx64 " 0x%016" PRIx64
+                               " 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n",
+                               block->idstr, psize,
+                               (uint64_t)block->offset,
+                               (uint64_t)block->used_length,
+                               (uint64_t)block->max_length,
+                               (uint64_t)(uintptr_t)block->host,
+                               block->mr->readonly ? "ro" : "rw");
+
+        g_free(psize);
+    }
+
+    return buf;
+}
+
+static int find_min_backend_pagesize(Object *obj, void *opaque)
+{
+    long *hpsize_min = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+        long hpsize = host_memory_backend_pagesize(backend);
+
+        if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) {
+            *hpsize_min = hpsize;
+        }
+    }
+
+    return 0;
+}
+
+static int find_max_backend_pagesize(Object *obj, void *opaque)
+{
+    long *hpsize_max = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+        long hpsize = host_memory_backend_pagesize(backend);
+
+        if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) {
+            *hpsize_max = hpsize;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * TODO: We assume right now that all mapped host memory backends are
+ * used as RAM, however some might be used for different purposes.
+ */
+long qemu_minrampagesize(void)
+{
+    long hpsize = LONG_MAX;
+    Object *memdev_root = object_resolve_path("/objects", NULL);
+
+    object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
+    return hpsize;
+}
+
+long qemu_maxrampagesize(void)
+{
+    long pagesize = 0;
+    Object *memdev_root = object_resolve_path("/objects", NULL);
+
+    object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize);
+    return pagesize;
+}
+
+#ifdef CONFIG_POSIX
+static int64_t get_file_size(int fd)
+{
+    int64_t size;
+#if defined(__linux__)
+    struct stat st;
+
+    if (fstat(fd, &st) < 0) {
+        return -errno;
+    }
+
+    /* Special handling for devdax character devices */
+    if (S_ISCHR(st.st_mode)) {
+        g_autofree char *subsystem_path = NULL;
+        g_autofree char *subsystem = NULL;
+
+        subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem",
+                                         major(st.st_rdev), minor(st.st_rdev));
+        subsystem = g_file_read_link(subsystem_path, NULL);
+
+        if (subsystem && g_str_has_suffix(subsystem, "/dax")) {
+            g_autofree char *size_path = NULL;
+            g_autofree char *size_str = NULL;
+
+            size_path = g_strdup_printf("/sys/dev/char/%d:%d/size",
+                                    major(st.st_rdev), minor(st.st_rdev));
+
+            if (g_file_get_contents(size_path, &size_str, NULL, NULL)) {
+                return g_ascii_strtoll(size_str, NULL, 0);
+            }
+        }
+    }
+#endif /* defined(__linux__) */
+
+    /* st.st_size may be zero for special files yet lseek(2) works */
+    size = lseek(fd, 0, SEEK_END);
+    if (size < 0) {
+        return -errno;
+    }
+    return size;
+}
+
+static int64_t get_file_align(int fd)
+{
+    int64_t align = -1;
+#if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
+    struct stat st;
+
+    if (fstat(fd, &st) < 0) {
+        return -errno;
+    }
+
+    /* Special handling for devdax character devices */
+    if (S_ISCHR(st.st_mode)) {
+        g_autofree char *path = NULL;
+        g_autofree char *rpath = NULL;
+        struct daxctl_ctx *ctx;
+        struct daxctl_region *region;
+        int rc = 0;
+
+        path = g_strdup_printf("/sys/dev/char/%d:%d",
+                    major(st.st_rdev), minor(st.st_rdev));
+        rpath = realpath(path, NULL);
+        if (!rpath) {
+            return -errno;
+        }
+
+        rc = daxctl_new(&ctx);
+        if (rc) {
+            return -1;
+        }
+
+        daxctl_region_foreach(ctx, region) {
+            if (strstr(rpath, daxctl_region_get_path(region))) {
+                align = daxctl_region_get_align(region);
+                break;
+            }
+        }
+        daxctl_unref(ctx);
+    }
+#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */
+
+    return align;
+}
+
+static int file_ram_open(const char *path,
+                         const char *region_name,
+                         bool readonly,
+                         bool *created)
+{
+    char *filename;
+    char *sanitized_name;
+    char *c;
+    int fd = -1;
+
+    *created = false;
+    for (;;) {
+        fd = open(path, readonly ? O_RDONLY : O_RDWR);
+        if (fd >= 0) {
+            /*
+             * open(O_RDONLY) won't fail with EISDIR. Check manually if we
+             * opened a directory and fail similarly to how we fail ENOENT
+             * in readonly mode. Note that mkstemp() would imply O_RDWR.
+             */
+            if (readonly) {
+                struct stat file_stat;
+
+                if (fstat(fd, &file_stat)) {
+                    close(fd);
+                    if (errno == EINTR) {
+                        continue;
+                    }
+                    return -errno;
+                } else if (S_ISDIR(file_stat.st_mode)) {
+                    close(fd);
+                    return -EISDIR;
+                }
+            }
+            /* @path names an existing file, use it */
+            break;
+        }
+        if (errno == ENOENT) {
+            if (readonly) {
+                /* Refuse to create new, readonly files. */
+                return -ENOENT;
+            }
+            /* @path names a file that doesn't exist, create it */
+            fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
+            if (fd >= 0) {
+                *created = true;
+                break;
+            }
+        } else if (errno == EISDIR) {
+            /* @path names a directory, create a file there */
+            /* Make name safe to use with mkstemp by replacing '/' with '_'. */
+            sanitized_name = g_strdup(region_name);
+            for (c = sanitized_name; *c != '\0'; c++) {
+                if (*c == '/') {
+                    *c = '_';
+                }
+            }
+
+            filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
+                                       sanitized_name);
+            g_free(sanitized_name);
+
+            fd = mkstemp(filename);
+            if (fd >= 0) {
+                unlink(filename);
+                g_free(filename);
+                break;
+            }
+            g_free(filename);
+        }
+        if (errno != EEXIST && errno != EINTR) {
+            return -errno;
+        }
+        /*
+         * Try again on EINTR and EEXIST.  The latter happens when
+         * something else creates the file between our two open().
+         */
+    }
+
+    return fd;
+}
+
+static void *file_ram_alloc(RAMBlock *block,
+                            ram_addr_t memory,
+                            int fd,
+                            bool truncate,
+                            off_t offset,
+                            Error **errp)
+{
+    uint32_t qemu_map_flags;
+    void *area;
+
+    block->page_size = qemu_fd_getpagesize(fd);
+    if (block->mr->align % block->page_size) {
+        error_setg(errp, "alignment 0x%" PRIx64
+                   " must be multiples of page size 0x%zx",
+                   block->mr->align, block->page_size);
+        return NULL;
+    } else if (block->mr->align && !is_power_of_2(block->mr->align)) {
+        error_setg(errp, "alignment 0x%" PRIx64
+                   " must be a power of two", block->mr->align);
+        return NULL;
+    } else if (offset % block->page_size) {
+        error_setg(errp, "offset 0x%" PRIx64
+                   " must be multiples of page size 0x%zx",
+                   offset, block->page_size);
+        return NULL;
+    }
+    block->mr->align = MAX(block->page_size, block->mr->align);
+#if defined(__s390x__)
+    if (kvm_enabled()) {
+        block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
+    }
+#endif
+
+    if (memory < block->page_size) {
+        error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
+                   "or larger than page size 0x%zx",
+                   memory, block->page_size);
+        return NULL;
+    }
+
+    memory = ROUND_UP(memory, block->page_size);
+
+    /*
+     * ftruncate is not supported by hugetlbfs in older
+     * hosts, so don't bother bailing out on errors.
+     * If anything goes wrong with it under other filesystems,
+     * mmap will fail.
+     *
+     * Do not truncate the non-empty backend file to avoid corrupting
+     * the existing data in the file. Disabling shrinking is not
+     * enough. For example, the current vNVDIMM implementation stores
+     * the guest NVDIMM labels at the end of the backend file. If the
+     * backend file is later extended, QEMU will not be able to find
+     * those labels. Therefore, extending the non-empty backend file
+     * is disabled as well.
+     */
+    if (truncate && ftruncate(fd, offset + memory)) {
+        perror("ftruncate");
+    }
+
+    qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
+    qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
+    qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
+    qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
+    area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
+    if (area == MAP_FAILED) {
+        error_setg_errno(errp, errno,
+                         "unable to map backing store for guest RAM");
+        return NULL;
+    }
+
+    block->fd = fd;
+    block->fd_offset = offset;
+    return area;
+}
+#endif
+
+/* Allocate space within the ram_addr_t space that governs the
+ * dirty bitmaps.
+ * Called with the ramlist lock held.
+ */
+static ram_addr_t find_ram_offset(ram_addr_t size)
+{
+    RAMBlock *block, *next_block;
+    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
+
+    assert(size != 0); /* it would hand out same offset multiple times */
+
+    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
+        return 0;
+    }
+
+    RAMBLOCK_FOREACH(block) {
+        ram_addr_t candidate, next = RAM_ADDR_MAX;
+
+        /* Align blocks to start on a 'long' in the bitmap
+         * which makes the bitmap sync'ing take the fast path.
+         */
+        candidate = block->offset + block->max_length;
+        candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS);
+
+        /* Search for the closest following block
+         * and find the gap.
+         */
+        RAMBLOCK_FOREACH(next_block) {
+            if (next_block->offset >= candidate) {
+                next = MIN(next, next_block->offset);
+            }
+        }
+
+        /* If it fits remember our place and remember the size
+         * of gap, but keep going so that we might find a smaller
+         * gap to fill so avoiding fragmentation.
+         */
+        if (next - candidate >= size && next - candidate < mingap) {
+            offset = candidate;
+            mingap = next - candidate;
+        }
+
+        trace_find_ram_offset_loop(size, candidate, offset, next, mingap);
+    }
+
+    if (offset == RAM_ADDR_MAX) {
+        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
+                (uint64_t)size);
+        abort();
+    }
+
+    trace_find_ram_offset(size, offset);
+
+    return offset;
+}
+
+static unsigned long last_ram_page(void)
+{
+    RAMBlock *block;
+    ram_addr_t last = 0;
+
+    RCU_READ_LOCK_GUARD();
+    RAMBLOCK_FOREACH(block) {
+        last = MAX(last, block->offset + block->max_length);
+    }
+    return last >> TARGET_PAGE_BITS;
+}
+
+static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
+{
+    int ret;
+
+    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
+    if (!machine_dump_guest_core(current_machine)) {
+        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
+        if (ret) {
+            perror("qemu_madvise");
+            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
+                            "but dump_guest_core=off specified\n");
+        }
+    }
+}
+
+const char *qemu_ram_get_idstr(RAMBlock *rb)
+{
+    return rb->idstr;
+}
+
+void *qemu_ram_get_host_addr(RAMBlock *rb)
+{
+    return rb->host;
+}
+
+ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
+{
+    return rb->offset;
+}
+
+ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
+{
+    return rb->used_length;
+}
+
+ram_addr_t qemu_ram_get_max_length(RAMBlock *rb)
+{
+    return rb->max_length;
+}
+
+bool qemu_ram_is_shared(RAMBlock *rb)
+{
+    return rb->flags & RAM_SHARED;
+}
+
+bool qemu_ram_is_noreserve(RAMBlock *rb)
+{
+    return rb->flags & RAM_NORESERVE;
+}
+
+/* Note: Only set at the start of postcopy */
+bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
+{
+    return rb->flags & RAM_UF_ZEROPAGE;
+}
+
+void qemu_ram_set_uf_zeroable(RAMBlock *rb)
+{
+    rb->flags |= RAM_UF_ZEROPAGE;
+}
+
+bool qemu_ram_is_migratable(RAMBlock *rb)
+{
+    return rb->flags & RAM_MIGRATABLE;
+}
+
+void qemu_ram_set_migratable(RAMBlock *rb)
+{
+    rb->flags |= RAM_MIGRATABLE;
+}
+
+void qemu_ram_unset_migratable(RAMBlock *rb)
+{
+    rb->flags &= ~RAM_MIGRATABLE;
+}
+
+bool qemu_ram_is_named_file(RAMBlock *rb)
+{
+    return rb->flags & RAM_NAMED_FILE;
+}
+
+int qemu_ram_get_fd(RAMBlock *rb)
+{
+    return rb->fd;
+}
+
+/* Called with iothread lock held.  */
+void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
+{
+    RAMBlock *block;
+
+    assert(new_block);
+    assert(!new_block->idstr[0]);
+
+    if (dev) {
+        char *id = qdev_get_dev_path(dev);
+        if (id) {
+            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
+            g_free(id);
+        }
+    }
+    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+
+    RCU_READ_LOCK_GUARD();
+    RAMBLOCK_FOREACH(block) {
+        if (block != new_block &&
+            !strcmp(block->idstr, new_block->idstr)) {
+            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
+                    new_block->idstr);
+            abort();
+        }
+    }
+}
+
+/* Called with iothread lock held.  */
+void qemu_ram_unset_idstr(RAMBlock *block)
+{
+    /* FIXME: arch_init.c assumes that this is not called throughout
+     * migration.  Ignore the problem since hot-unplug during migration
+     * does not work anyway.
+     */
+    if (block) {
+        memset(block->idstr, 0, sizeof(block->idstr));
+    }
+}
+
+size_t qemu_ram_pagesize(RAMBlock *rb)
+{
+    return rb->page_size;
+}
+
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+    RAMBlock *block;
+    size_t largest = 0;
+
+    RAMBLOCK_FOREACH(block) {
+        largest = MAX(largest, qemu_ram_pagesize(block));
+    }
+
+    return largest;
+}
+
+static int memory_try_enable_merging(void *addr, size_t len)
+{
+    if (!machine_mem_merge(current_machine)) {
+        /* disabled by the user */
+        return 0;
+    }
+
+    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
+}
+
+/*
+ * Resizing RAM while migrating can result in the migration being canceled.
+ * Care has to be taken if the guest might have already detected the memory.
+ *
+ * As memory core doesn't know how is memory accessed, it is up to
+ * resize callback to update device state and/or add assertions to detect
+ * misuse, if necessary.
+ */
+int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
+{
+    const ram_addr_t oldsize = block->used_length;
+    const ram_addr_t unaligned_size = newsize;
+
+    assert(block);
+
+    newsize = HOST_PAGE_ALIGN(newsize);
+
+    if (block->used_length == newsize) {
+        /*
+         * We don't have to resize the ram block (which only knows aligned
+         * sizes), however, we have to notify if the unaligned size changed.
+         */
+        if (unaligned_size != memory_region_size(block->mr)) {
+            memory_region_set_size(block->mr, unaligned_size);
+            if (block->resized) {
+                block->resized(block->idstr, unaligned_size, block->host);
+            }
+        }
+        return 0;
+    }
+
+    if (!(block->flags & RAM_RESIZEABLE)) {
+        error_setg_errno(errp, EINVAL,
+                         "Size mismatch: %s: 0x" RAM_ADDR_FMT
+                         " != 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->used_length);
+        return -EINVAL;
+    }
+
+    if (block->max_length < newsize) {
+        error_setg_errno(errp, EINVAL,
+                         "Size too large: %s: 0x" RAM_ADDR_FMT
+                         " > 0x" RAM_ADDR_FMT, block->idstr,
+                         newsize, block->max_length);
+        return -EINVAL;
+    }
+
+    /* Notify before modifying the ram block and touching the bitmaps. */
+    if (block->host) {
+        ram_block_notify_resize(block->host, oldsize, newsize);
+    }
+
+    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
+    block->used_length = newsize;
+    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
+                                        DIRTY_CLIENTS_ALL);
+    memory_region_set_size(block->mr, unaligned_size);
+    if (block->resized) {
+        block->resized(block->idstr, unaligned_size, block->host);
+    }
+    return 0;
+}
+
+/*
+ * Trigger sync on the given ram block for range [start, start + length]
+ * with the backing store if one is available.
+ * Otherwise no-op.
+ * @Note: this is supposed to be a synchronous op.
+ */
+void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
+{
+    /* The requested range should fit in within the block range */
+    g_assert((start + length) <= block->used_length);
+
+#ifdef CONFIG_LIBPMEM
+    /* The lack of support for pmem should not block the sync */
+    if (ramblock_is_pmem(block)) {
+        void *addr = ramblock_ptr(block, start);
+        pmem_persist(addr, length);
+        return;
+    }
+#endif
+    if (block->fd >= 0) {
+        /**
+         * Case there is no support for PMEM or the memory has not been
+         * specified as persistent (or is not one) - use the msync.
+         * Less optimal but still achieves the same goal
+         */
+        void *addr = ramblock_ptr(block, start);
+        if (qemu_msync(addr, length, block->fd)) {
+            warn_report("%s: failed to sync memory range: start: "
+                    RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
+                    __func__, start, length);
+        }
+    }
+}
+
+/* Called with ram_list.mutex held */
+static void dirty_memory_extend(ram_addr_t old_ram_size,
+                                ram_addr_t new_ram_size)
+{
+    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
+                                             DIRTY_MEMORY_BLOCK_SIZE);
+    ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
+                                             DIRTY_MEMORY_BLOCK_SIZE);
+    int i;
+
+    /* Only need to extend if block count increased */
+    if (new_num_blocks <= old_num_blocks) {
+        return;
+    }
+
+    for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+        DirtyMemoryBlocks *old_blocks;
+        DirtyMemoryBlocks *new_blocks;
+        int j;
+
+        old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
+        new_blocks = g_malloc(sizeof(*new_blocks) +
+                              sizeof(new_blocks->blocks[0]) * new_num_blocks);
+
+        if (old_num_blocks) {
+            memcpy(new_blocks->blocks, old_blocks->blocks,
+                   old_num_blocks * sizeof(old_blocks->blocks[0]));
+        }
+
+        for (j = old_num_blocks; j < new_num_blocks; j++) {
+            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
+        }
+
+        qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
+
+        if (old_blocks) {
+            g_free_rcu(old_blocks, rcu);
+        }
+    }
+}
+
+static void ram_block_add(RAMBlock *new_block, Error **errp)
+{
+    const bool noreserve = qemu_ram_is_noreserve(new_block);
+    const bool shared = qemu_ram_is_shared(new_block);
+    RAMBlock *block;
+    RAMBlock *last_block = NULL;
+    ram_addr_t old_ram_size, new_ram_size;
+    Error *err = NULL;
+
+    old_ram_size = last_ram_page();
+
+    qemu_mutex_lock_ramlist();
+    new_block->offset = find_ram_offset(new_block->max_length);
+
+    if (!new_block->host) {
+        if (xen_enabled()) {
+            xen_ram_alloc(new_block->offset, new_block->max_length,
+                          new_block->mr, &err);
+            if (err) {
+                error_propagate(errp, err);
+                qemu_mutex_unlock_ramlist();
+                return;
+            }
+        } else {
+            new_block->host = qemu_anon_ram_alloc(new_block->max_length,
+                                                  &new_block->mr->align,
+                                                  shared, noreserve);
+            if (!new_block->host) {
+                error_setg_errno(errp, errno,
+                                 "cannot set up guest memory '%s'",
+                                 memory_region_name(new_block->mr));
+                qemu_mutex_unlock_ramlist();
+                return;
+            }
+            memory_try_enable_merging(new_block->host, new_block->max_length);
+        }
+    }
+
+    new_ram_size = MAX(old_ram_size,
+              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+    if (new_ram_size > old_ram_size) {
+        dirty_memory_extend(old_ram_size, new_ram_size);
+    }
+    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
+     * QLIST (which has an RCU-friendly variant) does not have insertion at
+     * tail, so save the last element in last_block.
+     */
+    RAMBLOCK_FOREACH(block) {
+        last_block = block;
+        if (block->max_length < new_block->max_length) {
+            break;
+        }
+    }
+    if (block) {
+        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
+    } else if (last_block) {
+        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
+    } else { /* list is empty */
+        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
+    }
+    ram_list.mru_block = NULL;
+
+    /* Write list before version */
+    smp_wmb();
+    ram_list.version++;
+    qemu_mutex_unlock_ramlist();
+
+    cpu_physical_memory_set_dirty_range(new_block->offset,
+                                        new_block->used_length,
+                                        DIRTY_CLIENTS_ALL);
+
+    if (new_block->host) {
+        qemu_ram_setup_dump(new_block->host, new_block->max_length);
+        qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+        /*
+         * MADV_DONTFORK is also needed by KVM in absence of synchronous MMU
+         * Configure it unless the machine is a qtest server, in which case
+         * KVM is not used and it may be forked (eg for fuzzing purposes).
+         */
+        if (!qtest_enabled()) {
+            qemu_madvise(new_block->host, new_block->max_length,
+                         QEMU_MADV_DONTFORK);
+        }
+        ram_block_notify_add(new_block->host, new_block->used_length,
+                             new_block->max_length);
+    }
+}
+
+#ifdef CONFIG_POSIX
+RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
+                                 uint32_t ram_flags, int fd, off_t offset,
+                                 Error **errp)
+{
+    RAMBlock *new_block;
+    Error *local_err = NULL;
+    int64_t file_size, file_align;
+
+    /* Just support these ram flags by now. */
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
+                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
+                          RAM_READONLY_FD)) == 0);
+
+    if (xen_enabled()) {
+        error_setg(errp, "-mem-path not supported with Xen");
+        return NULL;
+    }
+
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        error_setg(errp,
+                   "host lacks kvm mmu notifiers, -mem-path unsupported");
+        return NULL;
+    }
+
+    size = HOST_PAGE_ALIGN(size);
+    file_size = get_file_size(fd);
+    if (file_size > offset && file_size < (offset + size)) {
+        error_setg(errp, "backing store size 0x%" PRIx64
+                   " does not match 'size' option 0x" RAM_ADDR_FMT,
+                   file_size, size);
+        return NULL;
+    }
+
+    file_align = get_file_align(fd);
+    if (file_align > 0 && file_align > mr->align) {
+        error_setg(errp, "backing store align 0x%" PRIx64
+                   " is larger than 'align' option 0x%" PRIx64,
+                   file_align, mr->align);
+        return NULL;
+    }
+
+    new_block = g_malloc0(sizeof(*new_block));
+    new_block->mr = mr;
+    new_block->used_length = size;
+    new_block->max_length = size;
+    new_block->flags = ram_flags;
+    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
+                                     errp);
+    if (!new_block->host) {
+        g_free(new_block);
+        return NULL;
+    }
+
+    ram_block_add(new_block, &local_err);
+    if (local_err) {
+        g_free(new_block);
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    return new_block;
+
+}
+
+
+RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
+                                   uint32_t ram_flags, const char *mem_path,
+                                   off_t offset, Error **errp)
+{
+    int fd;
+    bool created;
+    RAMBlock *block;
+
+    fd = file_ram_open(mem_path, memory_region_name(mr),
+                       !!(ram_flags & RAM_READONLY_FD), &created);
+    if (fd < 0) {
+        error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
+                         mem_path);
+        if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
+            fd == -EACCES) {
+            /*
+             * If we can open the file R/O (note: will never create a new file)
+             * and we are dealing with a private mapping, there are still ways
+             * to consume such files and get RAM instead of ROM.
+             */
+            fd = file_ram_open(mem_path, memory_region_name(mr), true,
+                               &created);
+            if (fd < 0) {
+                return NULL;
+            }
+            assert(!created);
+            close(fd);
+            error_append_hint(errp, "Consider opening the backing store"
+                " read-only but still creating writable RAM using"
+                " '-object memory-backend-file,readonly=on,rom=off...'"
+                " (see \"VM templating\" documentation)\n");
+        }
+        return NULL;
+    }
+
+    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
+    if (!block) {
+        if (created) {
+            unlink(mem_path);
+        }
+        close(fd);
+        return NULL;
+    }
+
+    return block;
+}
+#endif
+
+static
+RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
+                                  void (*resized)(const char*,
+                                                  uint64_t length,
+                                                  void *host),
+                                  void *host, uint32_t ram_flags,
+                                  MemoryRegion *mr, Error **errp)
+{
+    RAMBlock *new_block;
+    Error *local_err = NULL;
+
+    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
+                          RAM_NORESERVE)) == 0);
+    assert(!host ^ (ram_flags & RAM_PREALLOC));
+
+    size = HOST_PAGE_ALIGN(size);
+    max_size = HOST_PAGE_ALIGN(max_size);
+    new_block = g_malloc0(sizeof(*new_block));
+    new_block->mr = mr;
+    new_block->resized = resized;
+    new_block->used_length = size;
+    new_block->max_length = max_size;
+    assert(max_size >= size);
+    new_block->fd = -1;
+    new_block->page_size = qemu_real_host_page_size();
+    new_block->host = host;
+    new_block->flags = ram_flags;
+    ram_block_add(new_block, &local_err);
+    if (local_err) {
+        g_free(new_block);
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    return new_block;
+}
+
+RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
+                                   MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
+                                   errp);
+}
+
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
+                         MemoryRegion *mr, Error **errp)
+{
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+    return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
+}
+
+RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
+                                     void (*resized)(const char*,
+                                                     uint64_t length,
+                                                     void *host),
+                                     MemoryRegion *mr, Error **errp)
+{
+    return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
+                                   RAM_RESIZEABLE, mr, errp);
+}
+
+static void reclaim_ramblock(RAMBlock *block)
+{
+    if (block->flags & RAM_PREALLOC) {
+        ;
+    } else if (xen_enabled()) {
+        xen_invalidate_map_cache_entry(block->host);
+#ifndef _WIN32
+    } else if (block->fd >= 0) {
+        qemu_ram_munmap(block->fd, block->host, block->max_length);
+        close(block->fd);
+#endif
+    } else {
+        qemu_anon_ram_free(block->host, block->max_length);
+    }
+    g_free(block);
+}
+
+void qemu_ram_free(RAMBlock *block)
+{
+    if (!block) {
+        return;
+    }
+
+    if (block->host) {
+        ram_block_notify_remove(block->host, block->used_length,
+                                block->max_length);
+    }
+
+    qemu_mutex_lock_ramlist();
+    QLIST_REMOVE_RCU(block, next);
+    ram_list.mru_block = NULL;
+    /* Write list before version */
+    smp_wmb();
+    ram_list.version++;
+    call_rcu(block, reclaim_ramblock, rcu);
+    qemu_mutex_unlock_ramlist();
+}
+
+#ifndef _WIN32
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+{
+    RAMBlock *block;
+    ram_addr_t offset;
+    int flags;
+    void *area, *vaddr;
+    int prot;
+
+    RAMBLOCK_FOREACH(block) {
+        offset = addr - block->offset;
+        if (offset < block->max_length) {
+            vaddr = ramblock_ptr(block, offset);
+            if (block->flags & RAM_PREALLOC) {
+                ;
+            } else if (xen_enabled()) {
+                abort();
+            } else {
+                flags = MAP_FIXED;
+                flags |= block->flags & RAM_SHARED ?
+                         MAP_SHARED : MAP_PRIVATE;
+                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+                prot = PROT_READ;
+                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+                if (block->fd >= 0) {
+                    area = mmap(vaddr, length, prot, flags, block->fd,
+                                offset + block->fd_offset);
+                } else {
+                    flags |= MAP_ANONYMOUS;
+                    area = mmap(vaddr, length, prot, flags, -1, 0);
+                }
+                if (area != vaddr) {
+                    error_report("Could not remap addr: "
+                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+                                 length, addr);
+                    exit(1);
+                }
+                memory_try_enable_merging(vaddr, length);
+                qemu_ram_setup_dump(vaddr, length);
+            }
+        }
+    }
+}
+#endif /* !_WIN32 */
+
+/* Return a host pointer to ram allocated with qemu_ram_alloc.
+ * This should not be used for general purpose DMA.  Use address_space_map
+ * or address_space_rw instead. For local memory (e.g. video ram) that the
+ * device owns, use memory_region_get_ram_ptr.
+ *
+ * Called within RCU critical section.
+ */
+void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
+{
+    RAMBlock *block = ram_block;
+
+    if (block == NULL) {
+        block = qemu_get_ram_block(addr);
+        addr -= block->offset;
+    }
+
+    if (xen_enabled() && block->host == NULL) {
+        /* We need to check if the requested address is in the RAM
+         * because we don't want to map the entire memory in QEMU.
+         * In that case just map until the end of the page.
+         */
+        if (block->offset == 0) {
+            return xen_map_cache(addr, 0, 0, false);
+        }
+
+        block->host = xen_map_cache(block->offset, block->max_length, 1, false);
+    }
+    return ramblock_ptr(block, addr);
+}
+
+/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
+ * but takes a size argument.
+ *
+ * Called within RCU critical section.
+ */
+static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
+                                 hwaddr *size, bool lock)
+{
+    RAMBlock *block = ram_block;
+    if (*size == 0) {
+        return NULL;
+    }
+
+    if (block == NULL) {
+        block = qemu_get_ram_block(addr);
+        addr -= block->offset;
+    }
+    *size = MIN(*size, block->max_length - addr);
+
+    if (xen_enabled() && block->host == NULL) {
+        /* We need to check if the requested address is in the RAM
+         * because we don't want to map the entire memory in QEMU.
+         * In that case just map the requested area.
+         */
+        if (block->offset == 0) {
+            return xen_map_cache(addr, *size, lock, lock);
+        }
+
+        block->host = xen_map_cache(block->offset, block->max_length, 1, lock);
+    }
+
+    return ramblock_ptr(block, addr);
+}
+
+/* Return the offset of a hostpointer within a ramblock */
+ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
+{
+    ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host;
+    assert((uintptr_t)host >= (uintptr_t)rb->host);
+    assert(res < rb->max_length);
+
+    return res;
+}
+
+/*
+ * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
+ * in that RAMBlock.
+ *
+ * ptr: Host pointer to look up
+ * round_offset: If true round the result offset down to a page boundary
+ * *ram_addr: set to result ram_addr
+ * *offset: set to result offset within the RAMBlock
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
+ */
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
+                                   ram_addr_t *offset)
+{
+    RAMBlock *block;
+    uint8_t *host = ptr;
+
+    if (xen_enabled()) {
+        ram_addr_t ram_addr;
+        RCU_READ_LOCK_GUARD();
+        ram_addr = xen_ram_addr_from_mapcache(ptr);
+        block = qemu_get_ram_block(ram_addr);
+        if (block) {
+            *offset = ram_addr - block->offset;
+        }
+        return block;
+    }
+
+    RCU_READ_LOCK_GUARD();
+    block = qatomic_rcu_read(&ram_list.mru_block);
+    if (block && block->host && host - block->host < block->max_length) {
+        goto found;
+    }
+
+    RAMBLOCK_FOREACH(block) {
+        /* This case append when the block is not mapped. */
+        if (block->host == NULL) {
+            continue;
+        }
+        if (host - block->host < block->max_length) {
+            goto found;
+        }
+    }
+
+    return NULL;
+
+found:
+    *offset = (host - block->host);
+    if (round_offset) {
+        *offset &= TARGET_PAGE_MASK;
+    }
+    return block;
+}
+
+/*
+ * Finds the named RAMBlock
+ *
+ * name: The name of RAMBlock to find
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ */
+RAMBlock *qemu_ram_block_by_name(const char *name)
+{
+    RAMBlock *block;
+
+    RAMBLOCK_FOREACH(block) {
+        if (!strcmp(name, block->idstr)) {
+            return block;
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Some of the system routines need to translate from a host pointer
+ * (typically a TLB entry) back to a ram offset.
+ */
+ram_addr_t qemu_ram_addr_from_host(void *ptr)
+{
+    RAMBlock *block;
+    ram_addr_t offset;
+
+    block = qemu_ram_block_from_host(ptr, false, &offset);
+    if (!block) {
+        return RAM_ADDR_INVALID;
+    }
+
+    return block->offset + offset;
+}
+
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+    ram_addr_t ram_addr;
+
+    ram_addr = qemu_ram_addr_from_host(ptr);
+    if (ram_addr == RAM_ADDR_INVALID) {
+        error_report("Bad ram pointer %p", ptr);
+        abort();
+    }
+    return ram_addr;
+}
+
+static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
+                                 MemTxAttrs attrs, void *buf, hwaddr len);
+static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+                                  const void *buf, hwaddr len);
+static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
+                                  bool is_write, MemTxAttrs attrs);
+
+static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
+                                unsigned len, MemTxAttrs attrs)
+{
+    subpage_t *subpage = opaque;
+    uint8_t buf[8];
+    MemTxResult res;
+
+#if defined(DEBUG_SUBPAGE)
+    printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__,
+           subpage, len, addr);
+#endif
+    res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len);
+    if (res) {
+        return res;
+    }
+    *data = ldn_p(buf, len);
+    return MEMTX_OK;
+}
+
+static MemTxResult subpage_write(void *opaque, hwaddr addr,
+                                 uint64_t value, unsigned len, MemTxAttrs attrs)
+{
+    subpage_t *subpage = opaque;
+    uint8_t buf[8];
+
+#if defined(DEBUG_SUBPAGE)
+    printf("%s: subpage %p len %u addr " HWADDR_FMT_plx
+           " value %"PRIx64"\n",
+           __func__, subpage, len, addr, value);
+#endif
+    stn_p(buf, len, value);
+    return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
+}
+
+static bool subpage_accepts(void *opaque, hwaddr addr,
+                            unsigned len, bool is_write,
+                            MemTxAttrs attrs)
+{
+    subpage_t *subpage = opaque;
+#if defined(DEBUG_SUBPAGE)
+    printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n",
+           __func__, subpage, is_write ? 'w' : 'r', len, addr);
+#endif
+
+    return flatview_access_valid(subpage->fv, addr + subpage->base,
+                                 len, is_write, attrs);
+}
+
+static const MemoryRegionOps subpage_ops = {
+    .read_with_attrs = subpage_read,
+    .write_with_attrs = subpage_write,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .valid.accepts = subpage_accepts,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
+                            uint16_t section)
+{
+    int idx, eidx;
+
+    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
+        return -1;
+    idx = SUBPAGE_IDX(start);
+    eidx = SUBPAGE_IDX(end);
+#if defined(DEBUG_SUBPAGE)
+    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
+           __func__, mmio, start, end, idx, eidx, section);
+#endif
+    for (; idx <= eidx; idx++) {
+        mmio->sub_section[idx] = section;
+    }
+
+    return 0;
+}
+
+static subpage_t *subpage_init(FlatView *fv, hwaddr base)
+{
+    subpage_t *mmio;
+
+    /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */
+    mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
+    mmio->fv = fv;
+    mmio->base = base;
+    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
+                          NULL, TARGET_PAGE_SIZE);
+    mmio->iomem.subpage = true;
+#if defined(DEBUG_SUBPAGE)
+    printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__,
+           mmio, base, TARGET_PAGE_SIZE);
+#endif
+
+    return mmio;
+}
+
+static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
+{
+    assert(fv);
+    MemoryRegionSection section = {
+        .fv = fv,
+        .mr = mr,
+        .offset_within_address_space = 0,
+        .offset_within_region = 0,
+        .size = int128_2_64(),
+    };
+
+    return phys_section_add(map, &section);
+}
+
+MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+                                      hwaddr index, MemTxAttrs attrs)
+{
+    int asidx = cpu_asidx_from_attrs(cpu, attrs);
+    CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
+    AddressSpaceDispatch *d = cpuas->memory_dispatch;
+    int section_index = index & ~TARGET_PAGE_MASK;
+    MemoryRegionSection *ret;
+
+    assert(section_index < d->map.sections_nb);
+    ret = d->map.sections + section_index;
+    assert(ret->mr);
+    assert(ret->mr->ops);
+
+    return ret;
+}
+
+static void io_mem_init(void)
+{
+    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
+                          NULL, UINT64_MAX);
+}
+
+AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
+{
+    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
+    uint16_t n;
+
+    n = dummy_section(&d->map, fv, &io_mem_unassigned);
+    assert(n == PHYS_SECTION_UNASSIGNED);
+
+    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
+
+    return d;
+}
+
+void address_space_dispatch_free(AddressSpaceDispatch *d)
+{
+    phys_sections_free(&d->map);
+    g_free(d);
+}
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+static void tcg_log_global_after_sync(MemoryListener *listener)
+{
+    CPUAddressSpace *cpuas;
+
+    /* Wait for the CPU to end the current TB.  This avoids the following
+     * incorrect race:
+     *
+     *      vCPU                         migration
+     *      ----------------------       -------------------------
+     *      TLB check -> slow path
+     *        notdirty_mem_write
+     *          write to RAM
+     *          mark dirty
+     *                                   clear dirty flag
+     *      TLB check -> fast path
+     *                                   read memory
+     *        write to RAM
+     *
+     * by pushing the migration thread's memory read after the vCPU thread has
+     * written the memory.
+     */
+    if (replay_mode == REPLAY_MODE_NONE) {
+        /*
+         * VGA can make calls to this function while updating the screen.
+         * In record/replay mode this causes a deadlock, because
+         * run_on_cpu waits for rr mutex. Therefore no races are possible
+         * in this case and no need for making run_on_cpu when
+         * record/replay is enabled.
+         */
+        cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+        run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
+}
+
+static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data)
+{
+    CPUAddressSpace *cpuas = data.host_ptr;
+
+    cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as);
+    tlb_flush(cpu);
+}
+
+static void tcg_commit(MemoryListener *listener)
+{
+    CPUAddressSpace *cpuas;
+    CPUState *cpu;
+
+    assert(tcg_enabled());
+    /* since each CPU stores ram addresses in its TLB cache, we must
+       reset the modified entries */
+    cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
+    cpu = cpuas->cpu;
+
+    /*
+     * Defer changes to as->memory_dispatch until the cpu is quiescent.
+     * Otherwise we race between (1) other cpu threads and (2) ongoing
+     * i/o for the current cpu thread, with data cached by mmu_lookup().
+     *
+     * In addition, queueing the work function will kick the cpu back to
+     * the main loop, which will end the RCU critical section and reclaim
+     * the memory data structures.
+     *
+     * That said, the listener is also called during realize, before
+     * all of the tcg machinery for run-on is initialized: thus halt_cond.
+     */
+    if (cpu->halt_cond) {
+        async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas));
+    } else {
+        tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas));
+    }
+}
+
+static void memory_map_init(void)
+{
+    system_memory = g_malloc(sizeof(*system_memory));
+
+    memory_region_init(system_memory, NULL, "system", UINT64_MAX);
+    address_space_init(&address_space_memory, system_memory, "memory");
+
+    system_io = g_malloc(sizeof(*system_io));
+    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
+                          65536);
+    address_space_init(&address_space_io, system_io, "I/O");
+}
+
+MemoryRegion *get_system_memory(void)
+{
+    return system_memory;
+}
+
+MemoryRegion *get_system_io(void)
+{
+    return system_io;
+}
+
+static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
+                                     hwaddr length)
+{
+    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+    addr += memory_region_get_ram_addr(mr);
+
+    /* No early return if dirty_log_mask is or becomes 0, because
+     * cpu_physical_memory_set_dirty_range will still call
+     * xen_modified_memory.
+     */
+    if (dirty_log_mask) {
+        dirty_log_mask =
+            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
+    }
+    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+        assert(tcg_enabled());
+        tb_invalidate_phys_range(addr, addr + length - 1);
+        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+    }
+    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
+}
+
+void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
+{
+    /*
+     * In principle this function would work on other memory region types too,
+     * but the ROM device use case is the only one where this operation is
+     * necessary.  Other memory regions should use the
+     * address_space_read/write() APIs.
+     */
+    assert(memory_region_is_romd(mr));
+
+    invalidate_and_set_dirty(mr, addr, size);
+}
+
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
+{
+    unsigned access_size_max = mr->ops->valid.max_access_size;
+
+    /* Regions are assumed to support 1-4 byte accesses unless
+       otherwise specified.  */
+    if (access_size_max == 0) {
+        access_size_max = 4;
+    }
+
+    /* Bound the maximum access by the alignment of the address.  */
+    if (!mr->ops->impl.unaligned) {
+        unsigned align_size_max = addr & -addr;
+        if (align_size_max != 0 && align_size_max < access_size_max) {
+            access_size_max = align_size_max;
+        }
+    }
+
+    /* Don't attempt accesses larger than the maximum.  */
+    if (l > access_size_max) {
+        l = access_size_max;
+    }
+    l = pow2floor(l);
+
+    return l;
+}
+
+bool prepare_mmio_access(MemoryRegion *mr)
+{
+    bool release_lock = false;
+
+    if (!qemu_mutex_iothread_locked()) {
+        qemu_mutex_lock_iothread();
+        release_lock = true;
+    }
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+
+    return release_lock;
+}
+
+/**
+ * flatview_access_allowed
+ * @mr: #MemoryRegion to be accessed
+ * @attrs: memory transaction attributes
+ * @addr: address within that memory region
+ * @len: the number of bytes to access
+ *
+ * Check if a memory transaction is allowed.
+ *
+ * Returns: true if transaction is allowed, false if denied.
+ */
+static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
+                                    hwaddr addr, hwaddr len)
+{
+    if (likely(!attrs.memory)) {
+        return true;
+    }
+    if (memory_region_is_ram(mr)) {
+        return true;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "Invalid access to non-RAM device at "
+                  "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
+                  "region '%s'\n", addr, len, memory_region_name(mr));
+    return false;
+}
+
+/* Called within RCU critical section.  */
+static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
+                                           MemTxAttrs attrs,
+                                           const void *ptr,
+                                           hwaddr len, hwaddr addr1,
+                                           hwaddr l, MemoryRegion *mr)
+{
+    uint8_t *ram_ptr;
+    uint64_t val;
+    MemTxResult result = MEMTX_OK;
+    bool release_lock = false;
+    const uint8_t *buf = ptr;
+
+    for (;;) {
+        if (!flatview_access_allowed(mr, attrs, addr1, l)) {
+            result |= MEMTX_ACCESS_ERROR;
+            /* Keep going. */
+        } else if (!memory_access_is_direct(mr, true)) {
+            release_lock |= prepare_mmio_access(mr);
+            l = memory_access_size(mr, l, addr1);
+            /* XXX: could force current_cpu to NULL to avoid
+               potential bugs */
+            val = ldn_he_p(buf, l);
+            result |= memory_region_dispatch_write(mr, addr1, val,
+                                                   size_memop(l), attrs);
+        } else {
+            /* RAM case */
+            ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
+            memmove(ram_ptr, buf, l);
+            invalidate_and_set_dirty(mr, addr1, l);
+        }
+
+        if (release_lock) {
+            qemu_mutex_unlock_iothread();
+            release_lock = false;
+        }
+
+        len -= l;
+        buf += l;
+        addr += l;
+
+        if (!len) {
+            break;
+        }
+
+        l = len;
+        mr = flatview_translate(fv, addr, &addr1, &l, true, attrs);
+    }
+
+    return result;
+}
+
+/* Called from RCU critical section.  */
+static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
+                                  const void *buf, hwaddr len)
+{
+    hwaddr l;
+    hwaddr addr1;
+    MemoryRegion *mr;
+
+    l = len;
+    mr = flatview_translate(fv, addr, &addr1, &l, true, attrs);
+    if (!flatview_access_allowed(mr, attrs, addr, len)) {
+        return MEMTX_ACCESS_ERROR;
+    }
+    return flatview_write_continue(fv, addr, attrs, buf, len,
+                                   addr1, l, mr);
+}
+
+/* Called within RCU critical section.  */
+MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
+                                   MemTxAttrs attrs, void *ptr,
+                                   hwaddr len, hwaddr addr1, hwaddr l,
+                                   MemoryRegion *mr)
+{
+    uint8_t *ram_ptr;
+    uint64_t val;
+    MemTxResult result = MEMTX_OK;
+    bool release_lock = false;
+    uint8_t *buf = ptr;
+
+    fuzz_dma_read_cb(addr, len, mr);
+    for (;;) {
+        if (!flatview_access_allowed(mr, attrs, addr1, l)) {
+            result |= MEMTX_ACCESS_ERROR;
+            /* Keep going. */
+        } else if (!memory_access_is_direct(mr, false)) {
+            /* I/O case */
+            release_lock |= prepare_mmio_access(mr);
+            l = memory_access_size(mr, l, addr1);
+            result |= memory_region_dispatch_read(mr, addr1, &val,
+                                                  size_memop(l), attrs);
+            stn_he_p(buf, l, val);
+        } else {
+            /* RAM case */
+            ram_ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false);
+            memcpy(buf, ram_ptr, l);
+        }
+
+        if (release_lock) {
+            qemu_mutex_unlock_iothread();
+            release_lock = false;
+        }
+
+        len -= l;
+        buf += l;
+        addr += l;
+
+        if (!len) {
+            break;
+        }
+
+        l = len;
+        mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
+    }
+
+    return result;
+}
+
+/* Called from RCU critical section.  */
+static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
+                                 MemTxAttrs attrs, void *buf, hwaddr len)
+{
+    hwaddr l;
+    hwaddr addr1;
+    MemoryRegion *mr;
+
+    l = len;
+    mr = flatview_translate(fv, addr, &addr1, &l, false, attrs);
+    if (!flatview_access_allowed(mr, attrs, addr, len)) {
+        return MEMTX_ACCESS_ERROR;
+    }
+    return flatview_read_continue(fv, addr, attrs, buf, len,
+                                  addr1, l, mr);
+}
+
+MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
+                                    MemTxAttrs attrs, void *buf, hwaddr len)
+{
+    MemTxResult result = MEMTX_OK;
+    FlatView *fv;
+
+    if (len > 0) {
+        RCU_READ_LOCK_GUARD();
+        fv = address_space_to_flatview(as);
+        result = flatview_read(fv, addr, attrs, buf, len);
+    }
+
+    return result;
+}
+
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
+                                MemTxAttrs attrs,
+                                const void *buf, hwaddr len)
+{
+    MemTxResult result = MEMTX_OK;
+    FlatView *fv;
+
+    if (len > 0) {
+        RCU_READ_LOCK_GUARD();
+        fv = address_space_to_flatview(as);
+        result = flatview_write(fv, addr, attrs, buf, len);
+    }
+
+    return result;
+}
+
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                             void *buf, hwaddr len, bool is_write)
+{
+    if (is_write) {
+        return address_space_write(as, addr, attrs, buf, len);
+    } else {
+        return address_space_read_full(as, addr, attrs, buf, len);
+    }
+}
+
+MemTxResult address_space_set(AddressSpace *as, hwaddr addr,
+                              uint8_t c, hwaddr len, MemTxAttrs attrs)
+{
+#define FILLBUF_SIZE 512
+    uint8_t fillbuf[FILLBUF_SIZE];
+    int l;
+    MemTxResult error = MEMTX_OK;
+
+    memset(fillbuf, c, FILLBUF_SIZE);
+    while (len > 0) {
+        l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
+        error |= address_space_write(as, addr, attrs, fillbuf, l);
+        len -= l;
+        addr += l;
+    }
+
+    return error;
+}
+
+void cpu_physical_memory_rw(hwaddr addr, void *buf,
+                            hwaddr len, bool is_write)
+{
+    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
+                     buf, len, is_write);
+}
+
+enum write_rom_type {
+    WRITE_DATA,
+    FLUSH_CACHE,
+};
+
+static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
+                                                           hwaddr addr,
+                                                           MemTxAttrs attrs,
+                                                           const void *ptr,
+                                                           hwaddr len,
+                                                           enum write_rom_type type)
+{
+    hwaddr l;
+    uint8_t *ram_ptr;
+    hwaddr addr1;
+    MemoryRegion *mr;
+    const uint8_t *buf = ptr;
+
+    RCU_READ_LOCK_GUARD();
+    while (len > 0) {
+        l = len;
+        mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
+
+        if (!(memory_region_is_ram(mr) ||
+              memory_region_is_romd(mr))) {
+            l = memory_access_size(mr, l, addr1);
+        } else {
+            /* ROM/RAM case */
+            ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
+            switch (type) {
+            case WRITE_DATA:
+                memcpy(ram_ptr, buf, l);
+                invalidate_and_set_dirty(mr, addr1, l);
+                break;
+            case FLUSH_CACHE:
+                flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
+                break;
+            }
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+    return MEMTX_OK;
+}
+
+/* used for ROM loading : can write in RAM and ROM */
+MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr,
+                                    MemTxAttrs attrs,
+                                    const void *buf, hwaddr len)
+{
+    return address_space_write_rom_internal(as, addr, attrs,
+                                            buf, len, WRITE_DATA);
+}
+
+void cpu_flush_icache_range(hwaddr start, hwaddr len)
+{
+    /*
+     * This function should do the same thing as an icache flush that was
+     * triggered from within the guest. For TCG we are always cache coherent,
+     * so there is no need to flush anything. For KVM / Xen we need to flush
+     * the host's instruction cache at least.
+     */
+    if (tcg_enabled()) {
+        return;
+    }
+
+    address_space_write_rom_internal(&address_space_memory,
+                                     start, MEMTXATTRS_UNSPECIFIED,
+                                     NULL, len, FLUSH_CACHE);
+}
+
+typedef struct {
+    MemoryRegion *mr;
+    void *buffer;
+    hwaddr addr;
+    hwaddr len;
+    bool in_use;
+} BounceBuffer;
+
+static BounceBuffer bounce;
+
+typedef struct MapClient {
+    QEMUBH *bh;
+    QLIST_ENTRY(MapClient) link;
+} MapClient;
+
+QemuMutex map_client_list_lock;
+static QLIST_HEAD(, MapClient) map_client_list
+    = QLIST_HEAD_INITIALIZER(map_client_list);
+
+static void cpu_unregister_map_client_do(MapClient *client)
+{
+    QLIST_REMOVE(client, link);
+    g_free(client);
+}
+
+static void cpu_notify_map_clients_locked(void)
+{
+    MapClient *client;
+
+    while (!QLIST_EMPTY(&map_client_list)) {
+        client = QLIST_FIRST(&map_client_list);
+        qemu_bh_schedule(client->bh);
+        cpu_unregister_map_client_do(client);
+    }
+}
+
+void cpu_register_map_client(QEMUBH *bh)
+{
+    MapClient *client = g_malloc(sizeof(*client));
+
+    qemu_mutex_lock(&map_client_list_lock);
+    client->bh = bh;
+    QLIST_INSERT_HEAD(&map_client_list, client, link);
+    /* Write map_client_list before reading in_use.  */
+    smp_mb();
+    if (!qatomic_read(&bounce.in_use)) {
+        cpu_notify_map_clients_locked();
+    }
+    qemu_mutex_unlock(&map_client_list_lock);
+}
+
+void cpu_exec_init_all(void)
+{
+    qemu_mutex_init(&ram_list.mutex);
+    /* The data structures we set up here depend on knowing the page size,
+     * so no more changes can be made after this point.
+     * In an ideal world, nothing we did before we had finished the
+     * machine setup would care about the target page size, and we could
+     * do this much later, rather than requiring board models to state
+     * up front what their requirements are.
+     */
+    finalize_target_page_bits();
+    io_mem_init();
+    memory_map_init();
+    qemu_mutex_init(&map_client_list_lock);
+}
+
+void cpu_unregister_map_client(QEMUBH *bh)
+{
+    MapClient *client;
+
+    qemu_mutex_lock(&map_client_list_lock);
+    QLIST_FOREACH(client, &map_client_list, link) {
+        if (client->bh == bh) {
+            cpu_unregister_map_client_do(client);
+            break;
+        }
+    }
+    qemu_mutex_unlock(&map_client_list_lock);
+}
+
+static void cpu_notify_map_clients(void)
+{
+    qemu_mutex_lock(&map_client_list_lock);
+    cpu_notify_map_clients_locked();
+    qemu_mutex_unlock(&map_client_list_lock);
+}
+
+static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
+                                  bool is_write, MemTxAttrs attrs)
+{
+    MemoryRegion *mr;
+    hwaddr l, xlat;
+
+    while (len > 0) {
+        l = len;
+        mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
+        if (!memory_access_is_direct(mr, is_write)) {
+            l = memory_access_size(mr, l, addr);
+            if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
+                return false;
+            }
+        }
+
+        len -= l;
+        addr += l;
+    }
+    return true;
+}
+
+bool address_space_access_valid(AddressSpace *as, hwaddr addr,
+                                hwaddr len, bool is_write,
+                                MemTxAttrs attrs)
+{
+    FlatView *fv;
+
+    RCU_READ_LOCK_GUARD();
+    fv = address_space_to_flatview(as);
+    return flatview_access_valid(fv, addr, len, is_write, attrs);
+}
+
+static hwaddr
+flatview_extend_translation(FlatView *fv, hwaddr addr,
+                            hwaddr target_len,
+                            MemoryRegion *mr, hwaddr base, hwaddr len,
+                            bool is_write, MemTxAttrs attrs)
+{
+    hwaddr done = 0;
+    hwaddr xlat;
+    MemoryRegion *this_mr;
+
+    for (;;) {
+        target_len -= len;
+        addr += len;
+        done += len;
+        if (target_len == 0) {
+            return done;
+        }
+
+        len = target_len;
+        this_mr = flatview_translate(fv, addr, &xlat,
+                                     &len, is_write, attrs);
+        if (this_mr != mr || xlat != base + done) {
+            return done;
+        }
+    }
+}
+
+/* Map a physical memory region into a host virtual address.
+ * May map a subset of the requested range, given by and returned in *plen.
+ * May return NULL if resources needed to perform the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ * Use cpu_register_map_client() to know when retrying the map operation is
+ * likely to succeed.
+ */
+void *address_space_map(AddressSpace *as,
+                        hwaddr addr,
+                        hwaddr *plen,
+                        bool is_write,
+                        MemTxAttrs attrs)
+{
+    hwaddr len = *plen;
+    hwaddr l, xlat;
+    MemoryRegion *mr;
+    FlatView *fv;
+
+    if (len == 0) {
+        return NULL;
+    }
+
+    l = len;
+    RCU_READ_LOCK_GUARD();
+    fv = address_space_to_flatview(as);
+    mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
+
+    if (!memory_access_is_direct(mr, is_write)) {
+        if (qatomic_xchg(&bounce.in_use, true)) {
+            *plen = 0;
+            return NULL;
+        }
+        /* Avoid unbounded allocations */
+        l = MIN(l, TARGET_PAGE_SIZE);
+        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
+        bounce.addr = addr;
+        bounce.len = l;
+
+        memory_region_ref(mr);
+        bounce.mr = mr;
+        if (!is_write) {
+            flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
+                               bounce.buffer, l);
+        }
+
+        *plen = l;
+        return bounce.buffer;
+    }
+
+
+    memory_region_ref(mr);
+    *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
+                                        l, is_write, attrs);
+    fuzz_dma_read_cb(addr, *plen, mr);
+    return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true);
+}
+
+/* Unmaps a memory region previously mapped by address_space_map().
+ * Will also mark the memory as dirty if is_write is true.  access_len gives
+ * the amount of memory that was actually read or written by the caller.
+ */
+void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
+                         bool is_write, hwaddr access_len)
+{
+    if (buffer != bounce.buffer) {
+        MemoryRegion *mr;
+        ram_addr_t addr1;
+
+        mr = memory_region_from_host(buffer, &addr1);
+        assert(mr != NULL);
+        if (is_write) {
+            invalidate_and_set_dirty(mr, addr1, access_len);
+        }
+        if (xen_enabled()) {
+            xen_invalidate_map_cache_entry(buffer);
+        }
+        memory_region_unref(mr);
+        return;
+    }
+    if (is_write) {
+        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
+                            bounce.buffer, access_len);
+    }
+    qemu_vfree(bounce.buffer);
+    bounce.buffer = NULL;
+    memory_region_unref(bounce.mr);
+    /* Clear in_use before reading map_client_list.  */
+    qatomic_set_mb(&bounce.in_use, false);
+    cpu_notify_map_clients();
+}
+
+void *cpu_physical_memory_map(hwaddr addr,
+                              hwaddr *plen,
+                              bool is_write)
+{
+    return address_space_map(&address_space_memory, addr, plen, is_write,
+                             MEMTXATTRS_UNSPECIFIED);
+}
+
+void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+                               bool is_write, hwaddr access_len)
+{
+    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
+}
+
+#define ARG1_DECL                AddressSpace *as
+#define ARG1                     as
+#define SUFFIX
+#define TRANSLATE(...)           address_space_translate(as, __VA_ARGS__)
+#define RCU_READ_LOCK(...)       rcu_read_lock()
+#define RCU_READ_UNLOCK(...)     rcu_read_unlock()
+#include "memory_ldst.c.inc"
+
+int64_t address_space_cache_init(MemoryRegionCache *cache,
+                                 AddressSpace *as,
+                                 hwaddr addr,
+                                 hwaddr len,
+                                 bool is_write)
+{
+    AddressSpaceDispatch *d;
+    hwaddr l;
+    MemoryRegion *mr;
+    Int128 diff;
+
+    assert(len > 0);
+
+    l = len;
+    cache->fv = address_space_get_flatview(as);
+    d = flatview_to_dispatch(cache->fv);
+    cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true);
+
+    /*
+     * cache->xlat is now relative to cache->mrs.mr, not to the section itself.
+     * Take that into account to compute how many bytes are there between
+     * cache->xlat and the end of the section.
+     */
+    diff = int128_sub(cache->mrs.size,
+                      int128_make64(cache->xlat - cache->mrs.offset_within_region));
+    l = int128_get64(int128_min(diff, int128_make64(l)));
+
+    mr = cache->mrs.mr;
+    memory_region_ref(mr);
+    if (memory_access_is_direct(mr, is_write)) {
+        /* We don't care about the memory attributes here as we're only
+         * doing this if we found actual RAM, which behaves the same
+         * regardless of attributes; so UNSPECIFIED is fine.
+         */
+        l = flatview_extend_translation(cache->fv, addr, len, mr,
+                                        cache->xlat, l, is_write,
+                                        MEMTXATTRS_UNSPECIFIED);
+        cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true);
+    } else {
+        cache->ptr = NULL;
+    }
+
+    cache->len = l;
+    cache->is_write = is_write;
+    return l;
+}
+
+void address_space_cache_invalidate(MemoryRegionCache *cache,
+                                    hwaddr addr,
+                                    hwaddr access_len)
+{
+    assert(cache->is_write);
+    if (likely(cache->ptr)) {
+        invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len);
+    }
+}
+
+void address_space_cache_destroy(MemoryRegionCache *cache)
+{
+    if (!cache->mrs.mr) {
+        return;
+    }
+
+    if (xen_enabled()) {
+        xen_invalidate_map_cache_entry(cache->ptr);
+    }
+    memory_region_unref(cache->mrs.mr);
+    flatview_unref(cache->fv);
+    cache->mrs.mr = NULL;
+    cache->fv = NULL;
+}
+
+/* Called from RCU critical section.  This function has the same
+ * semantics as address_space_translate, but it only works on a
+ * predefined range of a MemoryRegion that was mapped with
+ * address_space_cache_init.
+ */
+static inline MemoryRegion *address_space_translate_cached(
+    MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
+    hwaddr *plen, bool is_write, MemTxAttrs attrs)
+{
+    MemoryRegionSection section;
+    MemoryRegion *mr;
+    IOMMUMemoryRegion *iommu_mr;
+    AddressSpace *target_as;
+
+    assert(!cache->ptr);
+    *xlat = addr + cache->xlat;
+
+    mr = cache->mrs.mr;
+    iommu_mr = memory_region_get_iommu(mr);
+    if (!iommu_mr) {
+        /* MMIO region.  */
+        return mr;
+    }
+
+    section = address_space_translate_iommu(iommu_mr, xlat, plen,
+                                            NULL, is_write, true,
+                                            &target_as, attrs);
+    return section.mr;
+}
+
+/* Called from RCU critical section. address_space_read_cached uses this
+ * out of line function when the target is an MMIO or IOMMU region.
+ */
+MemTxResult
+address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
+                                   void *buf, hwaddr len)
+{
+    hwaddr addr1, l;
+    MemoryRegion *mr;
+
+    l = len;
+    mr = address_space_translate_cached(cache, addr, &addr1, &l, false,
+                                        MEMTXATTRS_UNSPECIFIED);
+    return flatview_read_continue(cache->fv,
+                                  addr, MEMTXATTRS_UNSPECIFIED, buf, len,
+                                  addr1, l, mr);
+}
+
+/* Called from RCU critical section. address_space_write_cached uses this
+ * out of line function when the target is an MMIO or IOMMU region.
+ */
+MemTxResult
+address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
+                                    const void *buf, hwaddr len)
+{
+    hwaddr addr1, l;
+    MemoryRegion *mr;
+
+    l = len;
+    mr = address_space_translate_cached(cache, addr, &addr1, &l, true,
+                                        MEMTXATTRS_UNSPECIFIED);
+    return flatview_write_continue(cache->fv,
+                                   addr, MEMTXATTRS_UNSPECIFIED, buf, len,
+                                   addr1, l, mr);
+}
+
+#define ARG1_DECL                MemoryRegionCache *cache
+#define ARG1                     cache
+#define SUFFIX                   _cached_slow
+#define TRANSLATE(...)           address_space_translate_cached(cache, __VA_ARGS__)
+#define RCU_READ_LOCK()          ((void)0)
+#define RCU_READ_UNLOCK()        ((void)0)
+#include "memory_ldst.c.inc"
+
+/* virtual memory access for debug (includes writing to ROM) */
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+                        void *ptr, size_t len, bool is_write)
+{
+    hwaddr phys_addr;
+    vaddr l, page;
+    uint8_t *buf = ptr;
+
+    cpu_synchronize_state(cpu);
+    while (len > 0) {
+        int asidx;
+        MemTxAttrs attrs;
+        MemTxResult res;
+
+        page = addr & TARGET_PAGE_MASK;
+        phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
+        asidx = cpu_asidx_from_attrs(cpu, attrs);
+        /* if no physical page mapped, return an error */
+        if (phys_addr == -1)
+            return -1;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len)
+            l = len;
+        phys_addr += (addr & ~TARGET_PAGE_MASK);
+        if (is_write) {
+            res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
+                                          attrs, buf, l);
+        } else {
+            res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
+                                     attrs, buf, l);
+        }
+        if (res != MEMTX_OK) {
+            return -1;
+        }
+        len -= l;
+        buf += l;
+        addr += l;
+    }
+    return 0;
+}
+
+/*
+ * Allows code that needs to deal with migration bitmaps etc to still be built
+ * target independent.
+ */
+size_t qemu_target_page_size(void)
+{
+    return TARGET_PAGE_SIZE;
+}
+
+int qemu_target_page_mask(void)
+{
+    return TARGET_PAGE_MASK;
+}
+
+int qemu_target_page_bits(void)
+{
+    return TARGET_PAGE_BITS;
+}
+
+int qemu_target_page_bits_min(void)
+{
+    return TARGET_PAGE_BITS_MIN;
+}
+
+/* Convert target pages to MiB (2**20). */
+size_t qemu_target_pages_to_MiB(size_t pages)
+{
+    int page_bits = TARGET_PAGE_BITS;
+
+    /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
+    g_assert(page_bits < 20);
+
+    return pages >> (20 - page_bits);
+}
+
+bool cpu_physical_memory_is_io(hwaddr phys_addr)
+{
+    MemoryRegion*mr;
+    hwaddr l = 1;
+
+    RCU_READ_LOCK_GUARD();
+    mr = address_space_translate(&address_space_memory,
+                                 phys_addr, &phys_addr, &l, false,
+                                 MEMTXATTRS_UNSPECIFIED);
+
+    return !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
+}
+
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+{
+    RAMBlock *block;
+    int ret = 0;
+
+    RCU_READ_LOCK_GUARD();
+    RAMBLOCK_FOREACH(block) {
+        ret = func(block, opaque);
+        if (ret) {
+            break;
+        }
+    }
+    return ret;
+}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    uint8_t *host_startaddr = rb->host + start;
+
+    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
+        error_report("ram_block_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->max_length) {
+        bool need_madvise, need_fallocate;
+        if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
+            error_report("ram_block_discard_range: Unaligned length: %zx",
+                         length);
+            goto err;
+        }
+
+        errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+        /* The logic here is messy;
+         *    madvise DONTNEED fails for hugepages
+         *    fallocate works on hugepages and shmem
+         *    shared anonymous memory requires madvise REMOVE
+         */
+        need_madvise = (rb->page_size == qemu_host_page_size);
+        need_fallocate = rb->fd != -1;
+        if (need_fallocate) {
+            /* For a file, this causes the area of the file to be zero'd
+             * if read, and for hugetlbfs also causes it to be unmapped
+             * so a userfault will trigger.
+             */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+            /*
+             * fallocate() will fail with readonly files. Let's print a
+             * proper error message.
+             */
+            if (rb->flags & RAM_READONLY_FD) {
+                error_report("ram_block_discard_range: Discarding RAM"
+                             " with readonly files is not supported");
+                goto err;
+
+            }
+            /*
+             * We'll discard data from the actual file, even though we only
+             * have a MAP_PRIVATE mapping, possibly messing with other
+             * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to
+             * change that behavior whithout violating the promised
+             * semantics of ram_block_discard_range().
+             *
+             * Only warn, because it works as long as nobody else uses that
+             * file.
+             */
+            if (!qemu_ram_is_shared(rb)) {
+                warn_report_once("ram_block_discard_range: Discarding RAM"
+                                 " in private file mappings is possibly"
+                                 " dangerous, because it will modify the"
+                                 " underlying file and will affect other"
+                                 " users of the file");
+            }
+
+            ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                            start, length);
+            if (ret) {
+                ret = -errno;
+                error_report("ram_block_discard_range: Failed to fallocate "
+                             "%s:%" PRIx64 " +%zx (%d)",
+                             rb->idstr, start, length, ret);
+                goto err;
+            }
+#else
+            ret = -ENOSYS;
+            error_report("ram_block_discard_range: fallocate not available/file"
+                         "%s:%" PRIx64 " +%zx (%d)",
+                         rb->idstr, start, length, ret);
+            goto err;
+#endif
+        }
+        if (need_madvise) {
+            /* For normal RAM this causes it to be unmapped,
+             * for shared memory it causes the local mapping to disappear
+             * and to fall back on the file contents (which we just
+             * fallocate'd away).
+             */
+#if defined(CONFIG_MADVISE)
+            if (qemu_ram_is_shared(rb) && rb->fd < 0) {
+                ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
+            } else {
+                ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+            }
+            if (ret) {
+                ret = -errno;
+                error_report("ram_block_discard_range: Failed to discard range "
+                             "%s:%" PRIx64 " +%zx (%d)",
+                             rb->idstr, start, length, ret);
+                goto err;
+            }
+#else
+            ret = -ENOSYS;
+            error_report("ram_block_discard_range: MADVISE not available"
+                         "%s:%" PRIx64 " +%zx (%d)",
+                         rb->idstr, start, length, ret);
+            goto err;
+#endif
+        }
+        trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
+                                      need_madvise, need_fallocate, ret);
+    } else {
+        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zx/" RAM_ADDR_FMT")",
+                     rb->idstr, start, length, rb->max_length);
+    }
+
+err:
+    return ret;
+}
+
+bool ramblock_is_pmem(RAMBlock *rb)
+{
+    return rb->flags & RAM_PMEM;
+}
+
+static void mtree_print_phys_entries(int start, int end, int skip, int ptr)
+{
+    if (start == end - 1) {
+        qemu_printf("\t%3d      ", start);
+    } else {
+        qemu_printf("\t%3d..%-3d ", start, end - 1);
+    }
+    qemu_printf(" skip=%d ", skip);
+    if (ptr == PHYS_MAP_NODE_NIL) {
+        qemu_printf(" ptr=NIL");
+    } else if (!skip) {
+        qemu_printf(" ptr=#%d", ptr);
+    } else {
+        qemu_printf(" ptr=[%d]", ptr);
+    }
+    qemu_printf("\n");
+}
+
+#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
+                           int128_sub((size), int128_one())) : 0)
+
+void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
+{
+    int i;
+
+    qemu_printf("  Dispatch\n");
+    qemu_printf("    Physical sections\n");
+
+    for (i = 0; i < d->map.sections_nb; ++i) {
+        MemoryRegionSection *s = d->map.sections + i;
+        const char *names[] = { " [unassigned]", " [not dirty]",
+                                " [ROM]", " [watch]" };
+
+        qemu_printf("      #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx
+                    " %s%s%s%s%s",
+            i,
+            s->offset_within_address_space,
+            s->offset_within_address_space + MR_SIZE(s->size),
+            s->mr->name ? s->mr->name : "(noname)",
+            i < ARRAY_SIZE(names) ? names[i] : "",
+            s->mr == root ? " [ROOT]" : "",
+            s == d->mru_section ? " [MRU]" : "",
+            s->mr->is_iommu ? " [iommu]" : "");
+
+        if (s->mr->alias) {
+            qemu_printf(" alias=%s", s->mr->alias->name ?
+                    s->mr->alias->name : "noname");
+        }
+        qemu_printf("\n");
+    }
+
+    qemu_printf("    Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n",
+               P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip);
+    for (i = 0; i < d->map.nodes_nb; ++i) {
+        int j, jprev;
+        PhysPageEntry prev;
+        Node *n = d->map.nodes + i;
+
+        qemu_printf("      [%d]\n", i);
+
+        for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) {
+            PhysPageEntry *pe = *n + j;
+
+            if (pe->ptr == prev.ptr && pe->skip == prev.skip) {
+                continue;
+            }
+
+            mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
+
+            jprev = j;
+            prev = *pe;
+        }
+
+        if (jprev != ARRAY_SIZE(*n)) {
+            mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
+        }
+    }
+}
+
+/* Require any discards to work. */
+static unsigned int ram_block_discard_required_cnt;
+/* Require only coordinated discards to work. */
+static unsigned int ram_block_coordinated_discard_required_cnt;
+/* Disable any discards. */
+static unsigned int ram_block_discard_disabled_cnt;
+/* Disable only uncoordinated discards. */
+static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
+static QemuMutex ram_block_discard_disable_mutex;
+
+static void ram_block_discard_disable_mutex_lock(void)
+{
+    static gsize initialized;
+
+    if (g_once_init_enter(&initialized)) {
+        qemu_mutex_init(&ram_block_discard_disable_mutex);
+        g_once_init_leave(&initialized, 1);
+    }
+    qemu_mutex_lock(&ram_block_discard_disable_mutex);
+}
+
+static void ram_block_discard_disable_mutex_unlock(void)
+{
+    qemu_mutex_unlock(&ram_block_discard_disable_mutex);
+}
+
+int ram_block_discard_disable(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_discard_disabled_cnt--;
+    } else if (ram_block_discard_required_cnt ||
+               ram_block_coordinated_discard_required_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_discard_disabled_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
+
+int ram_block_uncoordinated_discard_disable(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_uncoordinated_discard_disabled_cnt--;
+    } else if (ram_block_discard_required_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_uncoordinated_discard_disabled_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
+
+int ram_block_discard_require(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_discard_required_cnt--;
+    } else if (ram_block_discard_disabled_cnt ||
+               ram_block_uncoordinated_discard_disabled_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_discard_required_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
+
+int ram_block_coordinated_discard_require(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_coordinated_discard_required_cnt--;
+    } else if (ram_block_discard_disabled_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_coordinated_discard_required_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
+
+bool ram_block_discard_is_disabled(void)
+{
+    return qatomic_read(&ram_block_discard_disabled_cnt) ||
+           qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
+}
+
+bool ram_block_discard_is_required(void)
+{
+    return qatomic_read(&ram_block_discard_required_cnt) ||
+           qatomic_read(&ram_block_coordinated_discard_required_cnt);
+}
diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
new file mode 100644
index 0000000..74f4e41
--- /dev/null
+++ b/system/qdev-monitor.c
@@ -0,0 +1,1148 @@
+/*
+ *  Dynamic device configuration and creation.
+ *
+ *  Copyright (c) 2009 CodeSourcery
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "monitor/qdev.h"
+#include "sysemu/arch_init.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-qdev.h"
+#include "qapi/qmp/dispatch.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "qemu/help_option.h"
+#include "qemu/option.h"
+#include "qemu/qemu-print.h"
+#include "qemu/option_int.h"
+#include "sysemu/block-backend.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "qemu/cutils.h"
+#include "hw/qdev-properties.h"
+#include "hw/clock.h"
+#include "hw/boards.h"
+
+/*
+ * Aliases were a bad idea from the start.  Let's keep them
+ * from spreading further.
+ */
+typedef struct QDevAlias
+{
+    const char *typename;
+    const char *alias;
+    uint32_t arch_mask;
+} QDevAlias;
+
+/* default virtio transport per architecture */
+#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \
+                              QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \
+                              QEMU_ARCH_MIPS | QEMU_ARCH_PPC |  \
+                              QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \
+                              QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA | \
+                              QEMU_ARCH_LOONGARCH)
+#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X)
+#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K)
+
+/* Please keep this table sorted by typename. */
+static const QDevAlias qdev_alias_table[] = {
+    { "AC97", "ac97" }, /* -soundhw name */
+    { "e1000", "e1000-82540em" },
+    { "ES1370", "es1370" }, /* -soundhw name */
+    { "ich9-ahci", "ahci" },
+    { "lsi53c895a", "lsi" },
+    { "virtio-9p-device", "virtio-9p", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-9p-pci", "virtio-9p", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-balloon-device", "virtio-balloon", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-balloon-ccw", "virtio-balloon", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-balloon-pci", "virtio-balloon", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-blk-device", "virtio-blk", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-blk-ccw", "virtio-blk", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-blk-pci", "virtio-blk", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-gpu-device", "virtio-gpu", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-gpu-ccw", "virtio-gpu", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-iommu-pci", "virtio-iommu", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-keyboard-device", "virtio-keyboard", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-keyboard-ccw", "virtio-keyboard", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-keyboard-pci", "virtio-keyboard", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-mouse-device", "virtio-mouse", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-mouse-ccw", "virtio-mouse", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-mouse-pci", "virtio-mouse", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-net-device", "virtio-net", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-net-ccw", "virtio-net", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-net-pci", "virtio-net", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-rng-device", "virtio-rng", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-rng-ccw", "virtio-rng", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-rng-pci", "virtio-rng", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-scsi-device", "virtio-scsi", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-scsi-ccw", "virtio-scsi", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-scsi-pci", "virtio-scsi", QEMU_ARCH_VIRTIO_PCI },
+    { "virtio-serial-device", "virtio-serial", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-serial-ccw", "virtio-serial", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_VIRTIO_PCI},
+    { "virtio-tablet-device", "virtio-tablet", QEMU_ARCH_VIRTIO_MMIO },
+    { "virtio-tablet-ccw", "virtio-tablet", QEMU_ARCH_VIRTIO_CCW },
+    { "virtio-tablet-pci", "virtio-tablet", QEMU_ARCH_VIRTIO_PCI },
+    { }
+};
+
+static const char *qdev_class_get_alias(DeviceClass *dc)
+{
+    const char *typename = object_class_get_name(OBJECT_CLASS(dc));
+    int i;
+
+    for (i = 0; qdev_alias_table[i].typename; i++) {
+        if (qdev_alias_table[i].arch_mask &&
+            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            continue;
+        }
+
+        if (strcmp(qdev_alias_table[i].typename, typename) == 0) {
+            return qdev_alias_table[i].alias;
+        }
+    }
+
+    return NULL;
+}
+
+static bool qdev_class_has_alias(DeviceClass *dc)
+{
+    return (qdev_class_get_alias(dc) != NULL);
+}
+
+static void qdev_print_devinfo(DeviceClass *dc)
+{
+    qemu_printf("name \"%s\"", object_class_get_name(OBJECT_CLASS(dc)));
+    if (dc->bus_type) {
+        qemu_printf(", bus %s", dc->bus_type);
+    }
+    if (qdev_class_has_alias(dc)) {
+        qemu_printf(", alias \"%s\"", qdev_class_get_alias(dc));
+    }
+    if (dc->desc) {
+        qemu_printf(", desc \"%s\"", dc->desc);
+    }
+    if (!dc->user_creatable) {
+        qemu_printf(", no-user");
+    }
+    qemu_printf("\n");
+}
+
+static void qdev_print_devinfos(bool show_no_user)
+{
+    static const char *cat_name[DEVICE_CATEGORY_MAX + 1] = {
+        [DEVICE_CATEGORY_BRIDGE]  = "Controller/Bridge/Hub",
+        [DEVICE_CATEGORY_USB]     = "USB",
+        [DEVICE_CATEGORY_STORAGE] = "Storage",
+        [DEVICE_CATEGORY_NETWORK] = "Network",
+        [DEVICE_CATEGORY_INPUT]   = "Input",
+        [DEVICE_CATEGORY_DISPLAY] = "Display",
+        [DEVICE_CATEGORY_SOUND]   = "Sound",
+        [DEVICE_CATEGORY_MISC]    = "Misc",
+        [DEVICE_CATEGORY_CPU]     = "CPU",
+        [DEVICE_CATEGORY_WATCHDOG]= "Watchdog",
+        [DEVICE_CATEGORY_MAX]     = "Uncategorized",
+    };
+    GSList *list, *elt;
+    int i;
+    bool cat_printed;
+
+    module_load_qom_all();
+    list = object_class_get_list_sorted(TYPE_DEVICE, false);
+
+    for (i = 0; i <= DEVICE_CATEGORY_MAX; i++) {
+        cat_printed = false;
+        for (elt = list; elt; elt = elt->next) {
+            DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
+                                                 TYPE_DEVICE);
+            if ((i < DEVICE_CATEGORY_MAX
+                 ? !test_bit(i, dc->categories)
+                 : !bitmap_empty(dc->categories, DEVICE_CATEGORY_MAX))
+                || (!show_no_user
+                    && !dc->user_creatable)) {
+                continue;
+            }
+            if (!cat_printed) {
+                qemu_printf("%s%s devices:\n", i ? "\n" : "", cat_name[i]);
+                cat_printed = true;
+            }
+            qdev_print_devinfo(dc);
+        }
+    }
+
+    g_slist_free(list);
+}
+
+static const char *find_typename_by_alias(const char *alias)
+{
+    int i;
+
+    for (i = 0; qdev_alias_table[i].alias; i++) {
+        if (qdev_alias_table[i].arch_mask &&
+            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            continue;
+        }
+
+        if (strcmp(qdev_alias_table[i].alias, alias) == 0) {
+            return qdev_alias_table[i].typename;
+        }
+    }
+
+    return NULL;
+}
+
+static DeviceClass *qdev_get_device_class(const char **driver, Error **errp)
+{
+    ObjectClass *oc;
+    DeviceClass *dc;
+    const char *original_name = *driver;
+
+    oc = module_object_class_by_name(*driver);
+    if (!oc) {
+        const char *typename = find_typename_by_alias(*driver);
+
+        if (typename) {
+            *driver = typename;
+            oc = module_object_class_by_name(*driver);
+        }
+    }
+
+    if (!object_class_dynamic_cast(oc, TYPE_DEVICE)) {
+        if (*driver != original_name) {
+            error_setg(errp, "'%s' (alias '%s') is not a valid device model"
+                       " name", original_name, *driver);
+        } else {
+            error_setg(errp, "'%s' is not a valid device model name", *driver);
+        }
+        return NULL;
+    }
+
+    if (object_class_is_abstract(oc)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
+                   "a non-abstract device type");
+        return NULL;
+    }
+
+    dc = DEVICE_CLASS(oc);
+    if (!dc->user_creatable ||
+        (phase_check(PHASE_MACHINE_READY) && !dc->hotpluggable)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
+                   "a pluggable device type");
+        return NULL;
+    }
+
+    if (object_class_dynamic_cast(oc, TYPE_SYS_BUS_DEVICE)) {
+        /* sysbus devices need to be allowed by the machine */
+        MachineClass *mc = MACHINE_CLASS(object_get_class(qdev_get_machine()));
+        if (!device_type_is_dynamic_sysbus(mc, *driver)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
+                       "a dynamic sysbus device type for the machine");
+            return NULL;
+        }
+    }
+
+    return dc;
+}
+
+
+int qdev_device_help(QemuOpts *opts)
+{
+    Error *local_err = NULL;
+    const char *driver;
+    ObjectPropertyInfoList *prop_list;
+    ObjectPropertyInfoList *prop;
+    GPtrArray *array;
+    int i;
+
+    driver = qemu_opt_get(opts, "driver");
+    if (driver && is_help_option(driver)) {
+        qdev_print_devinfos(false);
+        return 1;
+    }
+
+    if (!driver || !qemu_opt_has_help_opt(opts)) {
+        return 0;
+    }
+
+    if (!object_class_by_name(driver)) {
+        const char *typename = find_typename_by_alias(driver);
+
+        if (typename) {
+            driver = typename;
+        }
+    }
+
+    prop_list = qmp_device_list_properties(driver, &local_err);
+    if (local_err) {
+        goto error;
+    }
+
+    if (prop_list) {
+        qemu_printf("%s options:\n", driver);
+    } else {
+        qemu_printf("There are no options for %s.\n", driver);
+    }
+    array = g_ptr_array_new();
+    for (prop = prop_list; prop; prop = prop->next) {
+        g_ptr_array_add(array,
+                        object_property_help(prop->value->name,
+                                             prop->value->type,
+                                             prop->value->default_value,
+                                             prop->value->description));
+    }
+    g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+    for (i = 0; i < array->len; i++) {
+        qemu_printf("%s\n", (char *)array->pdata[i]);
+    }
+    g_ptr_array_set_free_func(array, g_free);
+    g_ptr_array_free(array, true);
+    qapi_free_ObjectPropertyInfoList(prop_list);
+    return 1;
+
+error:
+    error_report_err(local_err);
+    return 1;
+}
+
+static Object *qdev_get_peripheral(void)
+{
+    static Object *dev;
+
+    if (dev == NULL) {
+        dev = container_get(qdev_get_machine(), "/peripheral");
+    }
+
+    return dev;
+}
+
+static Object *qdev_get_peripheral_anon(void)
+{
+    static Object *dev;
+
+    if (dev == NULL) {
+        dev = container_get(qdev_get_machine(), "/peripheral-anon");
+    }
+
+    return dev;
+}
+
+static void qbus_error_append_bus_list_hint(DeviceState *dev,
+                                            Error *const *errp)
+{
+    BusState *child;
+    const char *sep = " ";
+
+    error_append_hint(errp, "child buses at \"%s\":",
+                      dev->id ? dev->id : object_get_typename(OBJECT(dev)));
+    QLIST_FOREACH(child, &dev->child_bus, sibling) {
+        error_append_hint(errp, "%s\"%s\"", sep, child->name);
+        sep = ", ";
+    }
+    error_append_hint(errp, "\n");
+}
+
+static void qbus_error_append_dev_list_hint(BusState *bus,
+                                            Error *const *errp)
+{
+    BusChild *kid;
+    const char *sep = " ";
+
+    error_append_hint(errp, "devices at \"%s\":", bus->name);
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        error_append_hint(errp, "%s\"%s\"", sep,
+                          object_get_typename(OBJECT(dev)));
+        if (dev->id) {
+            error_append_hint(errp, "/\"%s\"", dev->id);
+        }
+        sep = ", ";
+    }
+    error_append_hint(errp, "\n");
+}
+
+static BusState *qbus_find_bus(DeviceState *dev, char *elem)
+{
+    BusState *child;
+
+    QLIST_FOREACH(child, &dev->child_bus, sibling) {
+        if (strcmp(child->name, elem) == 0) {
+            return child;
+        }
+    }
+    return NULL;
+}
+
+static DeviceState *qbus_find_dev(BusState *bus, char *elem)
+{
+    BusChild *kid;
+
+    /*
+     * try to match in order:
+     *   (1) instance id, if present
+     *   (2) driver name
+     *   (3) driver alias, if present
+     */
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        if (dev->id  &&  strcmp(dev->id, elem) == 0) {
+            return dev;
+        }
+    }
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        if (strcmp(object_get_typename(OBJECT(dev)), elem) == 0) {
+            return dev;
+        }
+    }
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+        if (qdev_class_has_alias(dc) &&
+            strcmp(qdev_class_get_alias(dc), elem) == 0) {
+            return dev;
+        }
+    }
+    return NULL;
+}
+
+static inline bool qbus_is_full(BusState *bus)
+{
+    BusClass *bus_class;
+
+    if (bus->full) {
+        return true;
+    }
+    bus_class = BUS_GET_CLASS(bus);
+    return bus_class->max_dev && bus->num_children >= bus_class->max_dev;
+}
+
+/*
+ * Search the tree rooted at @bus for a bus.
+ * If @name, search for a bus with that name.  Note that bus names
+ * need not be unique.  Yes, that's screwed up.
+ * Else search for a bus that is a subtype of @bus_typename.
+ * If more than one exists, prefer one that can take another device.
+ * Return the bus if found, else %NULL.
+ */
+static BusState *qbus_find_recursive(BusState *bus, const char *name,
+                                     const char *bus_typename)
+{
+    BusChild *kid;
+    BusState *pick, *child, *ret;
+    bool match;
+
+    assert(name || bus_typename);
+    if (name) {
+        match = !strcmp(bus->name, name);
+    } else {
+        match = !!object_dynamic_cast(OBJECT(bus), bus_typename);
+    }
+
+    if (match && !qbus_is_full(bus)) {
+        return bus;             /* root matches and isn't full */
+    }
+
+    pick = match ? bus : NULL;
+
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        QLIST_FOREACH(child, &dev->child_bus, sibling) {
+            ret = qbus_find_recursive(child, name, bus_typename);
+            if (ret && !qbus_is_full(ret)) {
+                return ret;     /* a descendant matches and isn't full */
+            }
+            if (ret && !pick) {
+                pick = ret;
+            }
+        }
+    }
+
+    /* root or a descendant matches, but is full */
+    return pick;
+}
+
+static BusState *qbus_find(const char *path, Error **errp)
+{
+    DeviceState *dev;
+    BusState *bus;
+    char elem[128];
+    int pos, len;
+
+    /* find start element */
+    if (path[0] == '/') {
+        bus = sysbus_get_default();
+        pos = 0;
+    } else {
+        if (sscanf(path, "%127[^/]%n", elem, &len) != 1) {
+            assert(!path[0]);
+            elem[0] = len = 0;
+        }
+        bus = qbus_find_recursive(sysbus_get_default(), elem, NULL);
+        if (!bus) {
+            error_setg(errp, "Bus '%s' not found", elem);
+            return NULL;
+        }
+        pos = len;
+    }
+
+    for (;;) {
+        assert(path[pos] == '/' || !path[pos]);
+        while (path[pos] == '/') {
+            pos++;
+        }
+        if (path[pos] == '\0') {
+            break;
+        }
+
+        /* find device */
+        if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) {
+            g_assert_not_reached();
+            elem[0] = len = 0;
+        }
+        pos += len;
+        dev = qbus_find_dev(bus, elem);
+        if (!dev) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", elem);
+            qbus_error_append_dev_list_hint(bus, errp);
+            return NULL;
+        }
+
+        assert(path[pos] == '/' || !path[pos]);
+        while (path[pos] == '/') {
+            pos++;
+        }
+        if (path[pos] == '\0') {
+            /* last specified element is a device.  If it has exactly
+             * one child bus accept it nevertheless */
+            if (dev->num_child_bus == 1) {
+                bus = QLIST_FIRST(&dev->child_bus);
+                break;
+            }
+            if (dev->num_child_bus) {
+                error_setg(errp, "Device '%s' has multiple child buses",
+                           elem);
+                qbus_error_append_bus_list_hint(dev, errp);
+            } else {
+                error_setg(errp, "Device '%s' has no child bus", elem);
+            }
+            return NULL;
+        }
+
+        /* find bus */
+        if (sscanf(path+pos, "%127[^/]%n", elem, &len) != 1) {
+            g_assert_not_reached();
+            elem[0] = len = 0;
+        }
+        pos += len;
+        bus = qbus_find_bus(dev, elem);
+        if (!bus) {
+            error_setg(errp, "Bus '%s' not found", elem);
+            qbus_error_append_bus_list_hint(dev, errp);
+            return NULL;
+        }
+    }
+
+    if (qbus_is_full(bus)) {
+        error_setg(errp, "Bus '%s' is full", path);
+        return NULL;
+    }
+    return bus;
+}
+
+/* Takes ownership of @id, will be freed when deleting the device */
+const char *qdev_set_id(DeviceState *dev, char *id, Error **errp)
+{
+    ObjectProperty *prop;
+
+    assert(!dev->id && !dev->realized);
+
+    /*
+     * object_property_[try_]add_child() below will assert the device
+     * has no parent
+     */
+    if (id) {
+        prop = object_property_try_add_child(qdev_get_peripheral(), id,
+                                             OBJECT(dev), NULL);
+        if (prop) {
+            dev->id = id;
+        } else {
+            error_setg(errp, "Duplicate device ID '%s'", id);
+            g_free(id);
+            return NULL;
+        }
+    } else {
+        static int anon_count;
+        gchar *name = g_strdup_printf("device[%d]", anon_count++);
+        prop = object_property_add_child(qdev_get_peripheral_anon(), name,
+                                         OBJECT(dev));
+        g_free(name);
+    }
+
+    return prop->name;
+}
+
+DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+                                        bool from_json, Error **errp)
+{
+    ERRP_GUARD();
+    DeviceClass *dc;
+    const char *driver, *path;
+    char *id;
+    DeviceState *dev = NULL;
+    BusState *bus = NULL;
+
+    driver = qdict_get_try_str(opts, "driver");
+    if (!driver) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "driver");
+        return NULL;
+    }
+
+    /* find driver */
+    dc = qdev_get_device_class(&driver, errp);
+    if (!dc) {
+        return NULL;
+    }
+
+    /* find bus */
+    path = qdict_get_try_str(opts, "bus");
+    if (path != NULL) {
+        bus = qbus_find(path, errp);
+        if (!bus) {
+            return NULL;
+        }
+        if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) {
+            error_setg(errp, "Device '%s' can't go on %s bus",
+                       driver, object_get_typename(OBJECT(bus)));
+            return NULL;
+        }
+    } else if (dc->bus_type != NULL) {
+        bus = qbus_find_recursive(sysbus_get_default(), NULL, dc->bus_type);
+        if (!bus || qbus_is_full(bus)) {
+            error_setg(errp, "No '%s' bus found for device '%s'",
+                       dc->bus_type, driver);
+            return NULL;
+        }
+    }
+
+    if (qdev_should_hide_device(opts, from_json, errp)) {
+        if (bus && !qbus_is_hotpluggable(bus)) {
+            error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
+        }
+        return NULL;
+    } else if (*errp) {
+        return NULL;
+    }
+
+    if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) {
+        error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
+        return NULL;
+    }
+
+    if (!migration_is_idle()) {
+        error_setg(errp, "device_add not allowed while migrating");
+        return NULL;
+    }
+
+    /* create device */
+    dev = qdev_new(driver);
+
+    /* Check whether the hotplug is allowed by the machine */
+    if (phase_check(PHASE_MACHINE_READY)) {
+        if (!qdev_hotplug_allowed(dev, errp)) {
+            goto err_del_dev;
+        }
+
+        if (!bus && !qdev_get_machine_hotplug_handler(dev)) {
+            /* No bus, no machine hotplug handler --> device is not hotpluggable */
+            error_setg(errp, "Device '%s' can not be hotplugged on this machine",
+                       driver);
+            goto err_del_dev;
+        }
+    }
+
+    /*
+     * set dev's parent and register its id.
+     * If it fails it means the id is already taken.
+     */
+    id = g_strdup(qdict_get_try_str(opts, "id"));
+    if (!qdev_set_id(dev, id, errp)) {
+        goto err_del_dev;
+    }
+
+    /* set properties */
+    dev->opts = qdict_clone_shallow(opts);
+    qdict_del(dev->opts, "driver");
+    qdict_del(dev->opts, "bus");
+    qdict_del(dev->opts, "id");
+
+    object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json,
+                                      errp);
+    if (*errp) {
+        goto err_del_dev;
+    }
+
+    if (!qdev_realize(dev, bus, errp)) {
+        goto err_del_dev;
+    }
+    return dev;
+
+err_del_dev:
+    if (dev) {
+        object_unparent(OBJECT(dev));
+        object_unref(OBJECT(dev));
+    }
+    return NULL;
+}
+
+/* Takes ownership of @opts on success */
+DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
+{
+    QDict *qdict = qemu_opts_to_qdict(opts, NULL);
+    DeviceState *ret;
+
+    ret = qdev_device_add_from_qdict(qdict, false, errp);
+    if (ret) {
+        qemu_opts_del(opts);
+    }
+    qobject_unref(qdict);
+    return ret;
+}
+
+#define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__)
+static void qbus_print(Monitor *mon, BusState *bus, int indent);
+
+static void qdev_print_props(Monitor *mon, DeviceState *dev, Property *props,
+                             int indent)
+{
+    if (!props)
+        return;
+    for (; props->name; props++) {
+        char *value;
+        char *legacy_name = g_strdup_printf("legacy-%s", props->name);
+
+        if (object_property_get_type(OBJECT(dev), legacy_name, NULL)) {
+            value = object_property_get_str(OBJECT(dev), legacy_name, NULL);
+        } else {
+            value = object_property_print(OBJECT(dev), props->name, true,
+                                          NULL);
+        }
+        g_free(legacy_name);
+
+        if (!value) {
+            continue;
+        }
+        qdev_printf("%s = %s\n", props->name,
+                    *value ? value : "<null>");
+        g_free(value);
+    }
+}
+
+static void bus_print_dev(BusState *bus, Monitor *mon, DeviceState *dev, int indent)
+{
+    BusClass *bc = BUS_GET_CLASS(bus);
+
+    if (bc->print_dev) {
+        bc->print_dev(mon, dev, indent);
+    }
+}
+
+static void qdev_print(Monitor *mon, DeviceState *dev, int indent)
+{
+    ObjectClass *class;
+    BusState *child;
+    NamedGPIOList *ngl;
+    NamedClockList *ncl;
+
+    qdev_printf("dev: %s, id \"%s\"\n", object_get_typename(OBJECT(dev)),
+                dev->id ? dev->id : "");
+    indent += 2;
+    QLIST_FOREACH(ngl, &dev->gpios, node) {
+        if (ngl->num_in) {
+            qdev_printf("gpio-in \"%s\" %d\n", ngl->name ? ngl->name : "",
+                        ngl->num_in);
+        }
+        if (ngl->num_out) {
+            qdev_printf("gpio-out \"%s\" %d\n", ngl->name ? ngl->name : "",
+                        ngl->num_out);
+        }
+    }
+    QLIST_FOREACH(ncl, &dev->clocks, node) {
+        g_autofree char *freq_str = clock_display_freq(ncl->clock);
+        qdev_printf("clock-%s%s \"%s\" freq_hz=%s\n",
+                    ncl->output ? "out" : "in",
+                    ncl->alias ? " (alias)" : "",
+                    ncl->name, freq_str);
+    }
+    class = object_get_class(OBJECT(dev));
+    do {
+        qdev_print_props(mon, dev, DEVICE_CLASS(class)->props_, indent);
+        class = object_class_get_parent(class);
+    } while (class != object_class_by_name(TYPE_DEVICE));
+    bus_print_dev(dev->parent_bus, mon, dev, indent);
+    QLIST_FOREACH(child, &dev->child_bus, sibling) {
+        qbus_print(mon, child, indent);
+    }
+}
+
+static void qbus_print(Monitor *mon, BusState *bus, int indent)
+{
+    BusChild *kid;
+
+    qdev_printf("bus: %s\n", bus->name);
+    indent += 2;
+    qdev_printf("type %s\n", object_get_typename(OBJECT(bus)));
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+        qdev_print(mon, dev, indent);
+    }
+}
+#undef qdev_printf
+
+void hmp_info_qtree(Monitor *mon, const QDict *qdict)
+{
+    if (sysbus_get_default())
+        qbus_print(mon, sysbus_get_default(), 0);
+}
+
+void hmp_info_qdm(Monitor *mon, const QDict *qdict)
+{
+    qdev_print_devinfos(true);
+}
+
+void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
+{
+    QemuOpts *opts;
+    DeviceState *dev;
+
+    opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, errp);
+    if (!opts) {
+        return;
+    }
+    if (!monitor_cur_is_qmp() && qdev_device_help(opts)) {
+        qemu_opts_del(opts);
+        return;
+    }
+    dev = qdev_device_add(opts, errp);
+
+    /*
+     * Drain all pending RCU callbacks. This is done because
+     * some bus related operations can delay a device removal
+     * (in this case this can happen if device is added and then
+     * removed due to a configuration error)
+     * to a RCU callback, but user might expect that this interface
+     * will finish its job completely once qmp command returns result
+     * to the user
+     */
+    drain_call_rcu();
+
+    if (!dev) {
+        qemu_opts_del(opts);
+        return;
+    }
+    object_unref(OBJECT(dev));
+}
+
+static DeviceState *find_device_state(const char *id, Error **errp)
+{
+    Object *obj = object_resolve_path_at(qdev_get_peripheral(), id);
+    DeviceState *dev;
+
+    if (!obj) {
+        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                  "Device '%s' not found", id);
+        return NULL;
+    }
+
+    dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
+    if (!dev) {
+        error_setg(errp, "%s is not a hotpluggable device", id);
+        return NULL;
+    }
+
+    return dev;
+}
+
+void qdev_unplug(DeviceState *dev, Error **errp)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+    HotplugHandler *hotplug_ctrl;
+    HotplugHandlerClass *hdc;
+    Error *local_err = NULL;
+
+    if (qdev_unplug_blocked(dev, errp)) {
+        return;
+    }
+
+    if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
+        error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name);
+        return;
+    }
+
+    if (!dc->hotpluggable) {
+        error_setg(errp, QERR_DEVICE_NO_HOTPLUG,
+                   object_get_typename(OBJECT(dev)));
+        return;
+    }
+
+    if (!migration_is_idle() && !dev->allow_unplug_during_migration) {
+        error_setg(errp, "device_del not allowed while migrating");
+        return;
+    }
+
+    qdev_hot_removed = true;
+
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    /* hotpluggable device MUST have HotplugHandler, if it doesn't
+     * then something is very wrong with it */
+    g_assert(hotplug_ctrl);
+
+    /* If device supports async unplug just request it to be done,
+     * otherwise just remove it synchronously */
+    hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
+    if (hdc->unplug_request) {
+        hotplug_handler_unplug_request(hotplug_ctrl, dev, &local_err);
+    } else {
+        hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
+        if (!local_err) {
+            object_unparent(OBJECT(dev));
+        }
+    }
+    error_propagate(errp, local_err);
+}
+
+void qmp_device_del(const char *id, Error **errp)
+{
+    DeviceState *dev = find_device_state(id, errp);
+    if (dev != NULL) {
+        if (dev->pending_deleted_event &&
+            (dev->pending_deleted_expires_ms == 0 ||
+             dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) {
+            error_setg(errp, "Device %s is already in the "
+                             "process of unplug", id);
+            return;
+        }
+
+        qdev_unplug(dev, errp);
+    }
+}
+
+void hmp_device_add(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_device_add((QDict *)qdict, NULL, &err);
+    hmp_handle_error(mon, err);
+}
+
+void hmp_device_del(Monitor *mon, const QDict *qdict)
+{
+    const char *id = qdict_get_str(qdict, "id");
+    Error *err = NULL;
+
+    qmp_device_del(id, &err);
+    hmp_handle_error(mon, err);
+}
+
+void device_add_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    GSList *list, *elt;
+    size_t len;
+
+    if (nb_args != 2) {
+        return;
+    }
+
+    len = strlen(str);
+    readline_set_completion_index(rs, len);
+    list = elt = object_class_get_list(TYPE_DEVICE, false);
+    while (elt) {
+        DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, elt->data,
+                                             TYPE_DEVICE);
+
+        if (dc->user_creatable) {
+            readline_add_completion_of(rs, str,
+                                object_class_get_name(OBJECT_CLASS(dc)));
+        }
+        elt = elt->next;
+    }
+    g_slist_free(list);
+}
+
+static int qdev_add_hotpluggable_device(Object *obj, void *opaque)
+{
+    GSList **list = opaque;
+    DeviceState *dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
+
+    if (dev == NULL) {
+        return 0;
+    }
+
+    if (dev->realized && object_property_get_bool(obj, "hotpluggable", NULL)) {
+        *list = g_slist_append(*list, dev);
+    }
+
+    return 0;
+}
+
+static GSList *qdev_build_hotpluggable_device_list(Object *peripheral)
+{
+    GSList *list = NULL;
+
+    object_child_foreach(peripheral, qdev_add_hotpluggable_device, &list);
+
+    return list;
+}
+
+static void peripheral_device_del_completion(ReadLineState *rs,
+                                             const char *str)
+{
+    Object *peripheral = container_get(qdev_get_machine(), "/peripheral");
+    GSList *list, *item;
+
+    list = qdev_build_hotpluggable_device_list(peripheral);
+    if (!list) {
+        return;
+    }
+
+    for (item = list; item; item = g_slist_next(item)) {
+        DeviceState *dev = item->data;
+
+        if (dev->id) {
+            readline_add_completion_of(rs, str, dev->id);
+        }
+    }
+
+    g_slist_free(list);
+}
+
+void device_del_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    if (nb_args != 2) {
+        return;
+    }
+
+    readline_set_completion_index(rs, strlen(str));
+    peripheral_device_del_completion(rs, str);
+}
+
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
+{
+    DeviceState *dev;
+    BlockBackend *blk;
+
+    GLOBAL_STATE_CODE();
+
+    dev = find_device_state(id, errp);
+    if (dev == NULL) {
+        return NULL;
+    }
+
+    blk = blk_by_dev(dev);
+    if (!blk) {
+        error_setg(errp, "Device does not have a block device backend");
+    }
+    return blk;
+}
+
+QemuOptsList qemu_device_opts = {
+    .name = "device",
+    .implied_opt_name = "driver",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_device_opts.head),
+    .desc = {
+        /*
+         * no elements => accept any
+         * sanity checking will happen later
+         * when setting device properties
+         */
+        { /* end of list */ }
+    },
+};
+
+QemuOptsList qemu_global_opts = {
+    .name = "global",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_global_opts.head),
+    .desc = {
+        {
+            .name = "driver",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "property",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "value",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+int qemu_global_option(const char *str)
+{
+    char driver[64], property[64];
+    QemuOpts *opts;
+    int rc, offset;
+
+    rc = sscanf(str, "%63[^.=].%63[^=]%n", driver, property, &offset);
+    if (rc == 2 && str[offset] == '=') {
+        opts = qemu_opts_create(&qemu_global_opts, NULL, 0, &error_abort);
+        qemu_opt_set(opts, "driver", driver, &error_abort);
+        qemu_opt_set(opts, "property", property, &error_abort);
+        qemu_opt_set(opts, "value", str + offset + 1, &error_abort);
+        return 0;
+    }
+
+    opts = qemu_opts_parse_noisily(&qemu_global_opts, str, false);
+    if (!opts) {
+        return -1;
+    }
+    if (!qemu_opt_get(opts, "driver")
+        || !qemu_opt_get(opts, "property")
+        || !qemu_opt_get(opts, "value")) {
+        error_report("options 'driver', 'property', and 'value'"
+                     " are required");
+        return -1;
+    }
+
+    return 0;
+}
+
+bool qmp_command_available(const QmpCommand *cmd, Error **errp)
+{
+    if (!phase_check(PHASE_MACHINE_READY) &&
+        !(cmd->options & QCO_ALLOW_PRECONFIG)) {
+        error_setg(errp, "The command '%s' is permitted only after machine initialization has completed",
+                   cmd->name);
+        return false;
+    }
+    return true;
+}
diff --git a/system/qemu-seccomp.c b/system/qemu-seccomp.c
new file mode 100644
index 0000000..4d7439e
--- /dev/null
+++ b/system/qemu-seccomp.c
@@ -0,0 +1,486 @@
+/*
+ * QEMU seccomp mode 2 support with libseccomp
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Eduardo Otubo    <eotubo@br.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include <sys/prctl.h>
+#include <seccomp.h>
+#include "sysemu/seccomp.h"
+#include <linux/seccomp.h>
+
+/* For some architectures (notably ARM) cacheflush is not supported until
+ * libseccomp 2.2.3, but configure enforces that we are using a more recent
+ * version on those hosts, so it is OK for this check to be less strict.
+ */
+#if SCMP_VER_MAJOR >= 3
+  #define HAVE_CACHEFLUSH
+#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2
+  #define HAVE_CACHEFLUSH
+#endif
+
+struct QemuSeccompSyscall {
+    int32_t num;
+    uint8_t set;
+    uint8_t narg;
+    const struct scmp_arg_cmp *arg_cmp;
+    uint32_t action;
+};
+
+const struct scmp_arg_cmp sched_setscheduler_arg[] = {
+    /* was SCMP_A1(SCMP_CMP_NE, SCHED_IDLE), but expanded due to GCC 4.x bug */
+    { .arg = 1, .op = SCMP_CMP_NE, .datum_a = SCHED_IDLE }
+};
+
+/*
+ * See 'NOTES' in 'man 2 clone' - s390 & cross have 'flags' in
+ *  different position to other architectures
+ */
+#if defined(HOST_S390X) || defined(HOST_S390) || defined(HOST_CRIS)
+#define CLONE_FLAGS_ARG 1
+#else
+#define CLONE_FLAGS_ARG 0
+#endif
+
+#ifndef CLONE_PIDFD
+# define CLONE_PIDFD 0x00001000
+#endif
+
+#define REQUIRE_CLONE_FLAG(flag) \
+    const struct scmp_arg_cmp clone_arg ## flag[] = { \
+    { .arg = CLONE_FLAGS_ARG, \
+      .op = SCMP_CMP_MASKED_EQ, \
+      .datum_a = flag, .datum_b = 0 } }
+
+#define FORBID_CLONE_FLAG(flag) \
+    const struct scmp_arg_cmp clone_arg ## flag[] = { \
+    { .arg = CLONE_FLAGS_ARG, \
+      .op = SCMP_CMP_MASKED_EQ, \
+      .datum_a = flag, .datum_b = flag } }
+
+#define RULE_CLONE_FLAG(flag) \
+    { SCMP_SYS(clone),                  QEMU_SECCOMP_SET_SPAWN, \
+      ARRAY_SIZE(clone_arg ## flag), clone_arg ## flag, SCMP_ACT_TRAP }
+
+/* If no CLONE_* flags are set, except CSIGNAL, deny */
+const struct scmp_arg_cmp clone_arg_none[] = {
+    { .arg = CLONE_FLAGS_ARG,
+      .op = SCMP_CMP_MASKED_EQ,
+      .datum_a = ~(CSIGNAL), .datum_b = 0 }
+};
+
+/*
+ * pthread_create should always set all of these.
+ */
+REQUIRE_CLONE_FLAG(CLONE_VM);
+REQUIRE_CLONE_FLAG(CLONE_FS);
+REQUIRE_CLONE_FLAG(CLONE_FILES);
+REQUIRE_CLONE_FLAG(CLONE_SIGHAND);
+REQUIRE_CLONE_FLAG(CLONE_THREAD);
+REQUIRE_CLONE_FLAG(CLONE_SYSVSEM);
+REQUIRE_CLONE_FLAG(CLONE_SETTLS);
+REQUIRE_CLONE_FLAG(CLONE_PARENT_SETTID);
+REQUIRE_CLONE_FLAG(CLONE_CHILD_CLEARTID);
+/*
+ * Musl sets this in pthread_create too, but it is
+ * obsolete and harmless since its behaviour is
+ * subsumed under CLONE_THREAD
+ */
+/*REQUIRE_CLONE_FLAG(CLONE_DETACHED);*/
+
+
+/*
+ * These all indicate an attempt to spawn a process
+ * instead of a thread, or other undesirable scenarios
+ */
+FORBID_CLONE_FLAG(CLONE_PIDFD);
+FORBID_CLONE_FLAG(CLONE_PTRACE);
+FORBID_CLONE_FLAG(CLONE_VFORK);
+FORBID_CLONE_FLAG(CLONE_PARENT);
+FORBID_CLONE_FLAG(CLONE_NEWNS);
+FORBID_CLONE_FLAG(CLONE_UNTRACED);
+FORBID_CLONE_FLAG(CLONE_NEWCGROUP);
+FORBID_CLONE_FLAG(CLONE_NEWUTS);
+FORBID_CLONE_FLAG(CLONE_NEWIPC);
+FORBID_CLONE_FLAG(CLONE_NEWUSER);
+FORBID_CLONE_FLAG(CLONE_NEWPID);
+FORBID_CLONE_FLAG(CLONE_NEWNET);
+FORBID_CLONE_FLAG(CLONE_IO);
+
+
+static const struct QemuSeccompSyscall denylist[] = {
+    /* default set of syscalls that should get blocked */
+    { SCMP_SYS(reboot),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(swapon),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(swapoff),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(syslog),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(mount),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(umount),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(kexec_load),             QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(afs_syscall),            QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(break),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ftime),                  QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(getpmsg),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(gtty),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(lock),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(mpx),                    QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(prof),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(profil),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(putpmsg),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(security),               QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(stty),                   QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(tuxcall),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ulimit),                 QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(vserver),                QEMU_SECCOMP_SET_DEFAULT,
+      0, NULL, SCMP_ACT_TRAP },
+    /* obsolete */
+    { SCMP_SYS(readdir),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(_sysctl),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(bdflush),                QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(create_module),          QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(get_kernel_syms),        QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(query_module),           QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(sgetmask),               QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ssetmask),               QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(sysfs),                  QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(uselib),                 QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(ustat),                  QEMU_SECCOMP_SET_OBSOLETE,
+      0, NULL, SCMP_ACT_TRAP },
+    /* privileged */
+    { SCMP_SYS(setuid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setgid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setpgid),                QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setsid),                 QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setreuid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setregid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setresuid),              QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setresgid),              QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setfsuid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(setfsgid),               QEMU_SECCOMP_SET_PRIVILEGED,
+      0, NULL, SCMP_ACT_TRAP },
+    /* spawn */
+    { SCMP_SYS(fork),                   QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(vfork),                  QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(execve),                 QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_TRAP },
+    { SCMP_SYS(clone),                  QEMU_SECCOMP_SET_SPAWN,
+      ARRAY_SIZE(clone_arg_none), clone_arg_none, SCMP_ACT_TRAP },
+    RULE_CLONE_FLAG(CLONE_VM),
+    RULE_CLONE_FLAG(CLONE_FS),
+    RULE_CLONE_FLAG(CLONE_FILES),
+    RULE_CLONE_FLAG(CLONE_SIGHAND),
+    RULE_CLONE_FLAG(CLONE_THREAD),
+    RULE_CLONE_FLAG(CLONE_SYSVSEM),
+    RULE_CLONE_FLAG(CLONE_SETTLS),
+    RULE_CLONE_FLAG(CLONE_PARENT_SETTID),
+    RULE_CLONE_FLAG(CLONE_CHILD_CLEARTID),
+    /*RULE_CLONE_FLAG(CLONE_DETACHED),*/
+    RULE_CLONE_FLAG(CLONE_PIDFD),
+    RULE_CLONE_FLAG(CLONE_PTRACE),
+    RULE_CLONE_FLAG(CLONE_VFORK),
+    RULE_CLONE_FLAG(CLONE_PARENT),
+    RULE_CLONE_FLAG(CLONE_NEWNS),
+    RULE_CLONE_FLAG(CLONE_UNTRACED),
+    RULE_CLONE_FLAG(CLONE_NEWCGROUP),
+    RULE_CLONE_FLAG(CLONE_NEWUTS),
+    RULE_CLONE_FLAG(CLONE_NEWIPC),
+    RULE_CLONE_FLAG(CLONE_NEWUSER),
+    RULE_CLONE_FLAG(CLONE_NEWPID),
+    RULE_CLONE_FLAG(CLONE_NEWNET),
+    RULE_CLONE_FLAG(CLONE_IO),
+#ifdef __SNR_clone3
+    { SCMP_SYS(clone3),                 QEMU_SECCOMP_SET_SPAWN,
+      0, NULL, SCMP_ACT_ERRNO(ENOSYS) },
+#endif
+#ifdef __SNR_execveat
+    { SCMP_SYS(execveat),               QEMU_SECCOMP_SET_SPAWN },
+#endif
+    { SCMP_SYS(setns),                  QEMU_SECCOMP_SET_SPAWN },
+    { SCMP_SYS(unshare),                QEMU_SECCOMP_SET_SPAWN },
+    /* resource control */
+    { SCMP_SYS(setpriority),            QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setparam),         QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setscheduler),     QEMU_SECCOMP_SET_RESOURCECTL,
+      ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg,
+      SCMP_ACT_ERRNO(EPERM) },
+    { SCMP_SYS(sched_setaffinity),      QEMU_SECCOMP_SET_RESOURCECTL,
+      0, NULL, SCMP_ACT_ERRNO(EPERM) },
+};
+
+static inline __attribute__((unused)) int
+qemu_seccomp(unsigned int operation, unsigned int flags, void *args)
+{
+#ifdef __NR_seccomp
+    return syscall(__NR_seccomp, operation, flags, args);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+}
+
+static uint32_t qemu_seccomp_update_action(uint32_t action)
+{
+#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \
+    defined(SECCOMP_RET_KILL_PROCESS)
+    if (action == SCMP_ACT_TRAP) {
+        static int kill_process = -1;
+        if (kill_process == -1) {
+            uint32_t testaction = SECCOMP_RET_KILL_PROCESS;
+
+            if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &testaction) == 0) {
+                kill_process = 1;
+            } else {
+                kill_process = 0;
+            }
+        }
+        if (kill_process == 1) {
+            return SCMP_ACT_KILL_PROCESS;
+        }
+    }
+#endif
+    return action;
+}
+
+
+static int seccomp_start(uint32_t seccomp_opts, Error **errp)
+{
+    int rc = -1;
+    unsigned int i = 0;
+    scmp_filter_ctx ctx;
+
+    ctx = seccomp_init(SCMP_ACT_ALLOW);
+    if (ctx == NULL) {
+        error_setg(errp, "failed to initialize seccomp context");
+        goto seccomp_return;
+    }
+
+#if defined(CONFIG_SECCOMP_SYSRAWRC)
+    /*
+     * This must be the first seccomp_attr_set() call to have full
+     * error propagation from subsequent seccomp APIs.
+     */
+    rc = seccomp_attr_set(ctx, SCMP_FLTATR_API_SYSRAWRC, 1);
+    if (rc != 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to set seccomp rawrc attribute");
+        goto seccomp_return;
+    }
+#endif
+
+    rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1);
+    if (rc != 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to set seccomp thread synchronization");
+        goto seccomp_return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(denylist); i++) {
+        uint32_t action;
+        if (!(seccomp_opts & denylist[i].set)) {
+            continue;
+        }
+
+        action = qemu_seccomp_update_action(denylist[i].action);
+        rc = seccomp_rule_add_array(ctx, action, denylist[i].num,
+                                    denylist[i].narg, denylist[i].arg_cmp);
+        if (rc < 0) {
+            error_setg_errno(errp, -rc,
+                             "failed to add seccomp denylist rules");
+            goto seccomp_return;
+        }
+    }
+
+    rc = seccomp_load(ctx);
+    if (rc < 0) {
+        error_setg_errno(errp, -rc,
+                         "failed to load seccomp syscall filter in kernel");
+    }
+
+  seccomp_return:
+    seccomp_release(ctx);
+    return rc < 0 ? -1 : 0;
+}
+
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+    if (qemu_opt_get_bool(opts, "enable", false)) {
+        uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+                | QEMU_SECCOMP_SET_OBSOLETE;
+        const char *value = NULL;
+
+        value = qemu_opt_get(opts, "obsolete");
+        if (value) {
+            if (g_str_equal(value, "allow")) {
+                seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+            } else if (g_str_equal(value, "deny")) {
+                /* this is the default option, this if is here
+                 * to provide a little bit of consistency for
+                 * the command line */
+            } else {
+                error_setg(errp, "invalid argument for obsolete");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "elevateprivileges");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+            } else if (g_str_equal(value, "children")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+                /* calling prctl directly because we're
+                 * not sure if host has CAP_SYS_ADMIN set*/
+                if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+                    error_setg(errp, "failed to set no_new_privs aborting");
+                    return -1;
+                }
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for elevateprivileges");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "spawn");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for spawn");
+                return -1;
+            }
+        }
+
+        value = qemu_opt_get(opts, "resourcecontrol");
+        if (value) {
+            if (g_str_equal(value, "deny")) {
+                seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL;
+            } else if (g_str_equal(value, "allow")) {
+                /* default value */
+            } else {
+                error_setg(errp, "invalid argument for resourcecontrol");
+                return -1;
+            }
+        }
+
+        if (seccomp_start(seccomp_opts, errp) < 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static QemuOptsList qemu_sandbox_opts = {
+    .name = "sandbox",
+    .implied_opt_name = "enable",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
+    .desc = {
+        {
+            .name = "enable",
+            .type = QEMU_OPT_BOOL,
+        },
+        {
+            .name = "obsolete",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "elevateprivileges",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "spawn",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "resourcecontrol",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static void seccomp_register(void)
+{
+    bool add = false;
+
+    /* FIXME: use seccomp_api_get() >= 2 check when released */
+
+#if defined(SECCOMP_FILTER_FLAG_TSYNC)
+    int check;
+
+    /* check host TSYNC capability, it returns errno == ENOSYS if unavailable */
+    check = qemu_seccomp(SECCOMP_SET_MODE_FILTER,
+                         SECCOMP_FILTER_FLAG_TSYNC, NULL);
+    if (check < 0 && errno == EFAULT) {
+        add = true;
+    }
+#endif
+
+    if (add) {
+        qemu_add_opts(&qemu_sandbox_opts);
+    }
+}
+opts_init(seccomp_register);
diff --git a/system/qtest.c b/system/qtest.c
new file mode 100644
index 0000000..35b643a
--- /dev/null
+++ b/system/qtest.c
@@ -0,0 +1,1070 @@
+/*
+ * Test Server
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/qtest.h"
+#include "sysemu/runstate.h"
+#include "chardev/char-fe.h"
+#include "exec/ioport.h"
+#include "exec/memory.h"
+#include "exec/tswap.h"
+#include "hw/qdev-core.h"
+#include "hw/irq.h"
+#include "qemu/accel.h"
+#include "sysemu/cpu-timers.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/cutils.h"
+#include "qom/object_interfaces.h"
+
+#define MAX_IRQ 256
+
+#define TYPE_QTEST "qtest"
+
+OBJECT_DECLARE_SIMPLE_TYPE(QTest, QTEST)
+
+struct QTest {
+    Object parent;
+
+    bool has_machine_link;
+    char *chr_name;
+    Chardev *chr;
+    CharBackend qtest_chr;
+    char *log;
+};
+
+bool qtest_allowed;
+
+static DeviceState *irq_intercept_dev;
+static FILE *qtest_log_fp;
+static QTest *qtest;
+static GString *inbuf;
+static int irq_levels[MAX_IRQ];
+static GTimer *timer;
+static bool qtest_opened;
+static void (*qtest_server_send)(void*, const char*);
+static void *qtest_server_send_opaque;
+
+#define FMT_timeval "%.06f"
+
+/**
+ * DOC: QTest Protocol
+ *
+ * Line based protocol, request/response based.  Server can send async messages
+ * so clients should always handle many async messages before the response
+ * comes in.
+ *
+ * Valid requests
+ * ^^^^^^^^^^^^^^
+ *
+ * Clock management:
+ * """""""""""""""""
+ *
+ * The qtest client is completely in charge of the QEMU_CLOCK_VIRTUAL.  qtest commands
+ * let you adjust the value of the clock (monotonically).  All the commands
+ * return the current value of the clock in nanoseconds.
+ *
+ * .. code-block:: none
+ *
+ *  > clock_step
+ *  < OK VALUE
+ *
+ * Advance the clock to the next deadline.  Useful when waiting for
+ * asynchronous events.
+ *
+ * .. code-block:: none
+ *
+ *  > clock_step NS
+ *  < OK VALUE
+ *
+ * Advance the clock by NS nanoseconds.
+ *
+ * .. code-block:: none
+ *
+ *  > clock_set NS
+ *  < OK VALUE
+ *
+ * Advance the clock to NS nanoseconds (do nothing if it's already past).
+ *
+ * PIO and memory access:
+ * """"""""""""""""""""""
+ *
+ * .. code-block:: none
+ *
+ *  > outb ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > outw ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > outl ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > inb ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > inw ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > inl ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > writeb ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > writew ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > writel ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > writeq ADDR VALUE
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > readb ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > readw ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > readl ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > readq ADDR
+ *  < OK VALUE
+ *
+ * .. code-block:: none
+ *
+ *  > read ADDR SIZE
+ *  < OK DATA
+ *
+ * .. code-block:: none
+ *
+ *  > write ADDR SIZE DATA
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > b64read ADDR SIZE
+ *  < OK B64_DATA
+ *
+ * .. code-block:: none
+ *
+ *  > b64write ADDR SIZE B64_DATA
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > memset ADDR SIZE VALUE
+ *  < OK
+ *
+ * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0.
+ * For 'memset' a zero size is permitted and does nothing.
+ *
+ * DATA is an arbitrarily long hex number prefixed with '0x'.  If it's smaller
+ * than the expected size, the value will be zero filled at the end of the data
+ * sequence.
+ *
+ * B64_DATA is an arbitrarily long base64 encoded string.
+ * If the sizes do not match, the data will be truncated.
+ *
+ * IRQ management:
+ * """""""""""""""
+ *
+ * .. code-block:: none
+ *
+ *  > irq_intercept_in QOM-PATH
+ *  < OK
+ *
+ * .. code-block:: none
+ *
+ *  > irq_intercept_out QOM-PATH
+ *  < OK
+ *
+ * Attach to the gpio-in (resp. gpio-out) pins exported by the device at
+ * QOM-PATH.  When the pin is triggered, one of the following async messages
+ * will be printed to the qtest stream::
+ *
+ *  IRQ raise NUM
+ *  IRQ lower NUM
+ *
+ * where NUM is an IRQ number.  For the PC, interrupts can be intercepted
+ * simply with "irq_intercept_in ioapic" (note that IRQ0 comes out with
+ * NUM=0 even though it is remapped to GSI 2).
+ *
+ * Setting interrupt level:
+ * """"""""""""""""""""""""
+ *
+ * .. code-block:: none
+ *
+ *  > set_irq_in QOM-PATH NAME NUM LEVEL
+ *  < OK
+ *
+ * where NAME is the name of the irq/gpio list, NUM is an IRQ number and
+ * LEVEL is an signed integer IRQ level.
+ *
+ * Forcibly set the given interrupt pin to the given level.
+ *
+ */
+
+static int hex2nib(char ch)
+{
+    if (ch >= '0' && ch <= '9') {
+        return ch - '0';
+    } else if (ch >= 'a' && ch <= 'f') {
+        return 10 + (ch - 'a');
+    } else if (ch >= 'A' && ch <= 'F') {
+        return 10 + (ch - 'A');
+    } else {
+        return -1;
+    }
+}
+
+void qtest_send_prefix(CharBackend *chr)
+{
+    if (!qtest_log_fp || !qtest_opened) {
+        return;
+    }
+
+    fprintf(qtest_log_fp, "[S +" FMT_timeval "] ", g_timer_elapsed(timer, NULL));
+}
+
+static void G_GNUC_PRINTF(1, 2) qtest_log_send(const char *fmt, ...)
+{
+    va_list ap;
+
+    if (!qtest_log_fp || !qtest_opened) {
+        return;
+    }
+
+    qtest_send_prefix(NULL);
+
+    va_start(ap, fmt);
+    vfprintf(qtest_log_fp, fmt, ap);
+    va_end(ap);
+}
+
+static void qtest_server_char_be_send(void *opaque, const char *str)
+{
+    size_t len = strlen(str);
+    CharBackend* chr = (CharBackend *)opaque;
+    qemu_chr_fe_write_all(chr, (uint8_t *)str, len);
+    if (qtest_log_fp && qtest_opened) {
+        fprintf(qtest_log_fp, "%s", str);
+    }
+}
+
+static void qtest_send(CharBackend *chr, const char *str)
+{
+    qtest_server_send(qtest_server_send_opaque, str);
+}
+
+void qtest_sendf(CharBackend *chr, const char *fmt, ...)
+{
+    va_list ap;
+    gchar *buffer;
+
+    va_start(ap, fmt);
+    buffer = g_strdup_vprintf(fmt, ap);
+    qtest_send(chr, buffer);
+    g_free(buffer);
+    va_end(ap);
+}
+
+static void qtest_irq_handler(void *opaque, int n, int level)
+{
+    qemu_irq old_irq = *(qemu_irq *)opaque;
+    qemu_set_irq(old_irq, level);
+
+    if (irq_levels[n] != level) {
+        CharBackend *chr = &qtest->qtest_chr;
+        irq_levels[n] = level;
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "IRQ %s %d\n",
+                    level ? "raise" : "lower", n);
+    }
+}
+
+static int64_t qtest_clock_counter;
+
+int64_t qtest_get_virtual_clock(void)
+{
+    return qatomic_read_i64(&qtest_clock_counter);
+}
+
+static void qtest_set_virtual_clock(int64_t count)
+{
+    qatomic_set_i64(&qtest_clock_counter, count);
+}
+
+static void qtest_clock_warp(int64_t dest)
+{
+    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    AioContext *aio_context;
+    assert(qtest_enabled());
+    aio_context = qemu_get_aio_context();
+    while (clock < dest) {
+        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                                      QEMU_TIMER_ATTR_ALL);
+        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
+
+        qtest_set_virtual_clock(qtest_get_virtual_clock() + warp);
+
+        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
+        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    }
+    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+}
+
+static bool (*process_command_cb)(CharBackend *chr, gchar **words);
+
+void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words))
+{
+    assert(!process_command_cb);  /* Switch to a list if we need more than one */
+
+    process_command_cb = pc_cb;
+}
+
+static void qtest_install_gpio_out_intercept(DeviceState *dev, const char *name, int n)
+{
+    qemu_irq *disconnected = g_new0(qemu_irq, 1);
+    qemu_irq icpt = qemu_allocate_irq(qtest_irq_handler,
+                                      disconnected, n);
+
+    *disconnected = qdev_intercept_gpio_out(dev, icpt, name, n);
+}
+
+static void qtest_process_command(CharBackend *chr, gchar **words)
+{
+    const gchar *command;
+
+    g_assert(words);
+
+    command = words[0];
+
+    if (qtest_log_fp) {
+        int i;
+
+        fprintf(qtest_log_fp, "[R +" FMT_timeval "]", g_timer_elapsed(timer, NULL));
+        for (i = 0; words[i]; i++) {
+            fprintf(qtest_log_fp, " %s", words[i]);
+        }
+        fprintf(qtest_log_fp, "\n");
+    }
+
+    g_assert(command);
+    if (strcmp(words[0], "irq_intercept_out") == 0
+        || strcmp(words[0], "irq_intercept_in") == 0) {
+        DeviceState *dev;
+        NamedGPIOList *ngl;
+        bool is_named;
+        bool is_outbound;
+        bool interception_succeeded = false;
+
+        g_assert(words[1]);
+        is_named = words[2] != NULL;
+        is_outbound = words[0][14] == 'o';
+        dev = DEVICE(object_resolve_path(words[1], NULL));
+        if (!dev) {
+            qtest_send_prefix(chr);
+            qtest_send(chr, "FAIL Unknown device\n");
+            return;
+        }
+
+        if (is_named && !is_outbound) {
+            qtest_send_prefix(chr);
+            qtest_send(chr, "FAIL Interception of named in-GPIOs not yet supported\n");
+            return;
+        }
+
+        if (irq_intercept_dev) {
+            qtest_send_prefix(chr);
+            if (irq_intercept_dev != dev) {
+                qtest_send(chr, "FAIL IRQ intercept already enabled\n");
+            } else {
+                qtest_send(chr, "OK\n");
+            }
+            return;
+        }
+
+        QLIST_FOREACH(ngl, &dev->gpios, node) {
+            /* We don't support inbound interception of named GPIOs yet */
+            if (is_outbound) {
+                /* NULL is valid and matchable, for "unnamed GPIO" */
+                if (g_strcmp0(ngl->name, words[2]) == 0) {
+                    int i;
+                    for (i = 0; i < ngl->num_out; ++i) {
+                        qtest_install_gpio_out_intercept(dev, ngl->name, i);
+                    }
+                    interception_succeeded = true;
+                }
+            } else {
+                qemu_irq_intercept_in(ngl->in, qtest_irq_handler,
+                                      ngl->num_in);
+                interception_succeeded = true;
+            }
+        }
+
+        qtest_send_prefix(chr);
+        if (interception_succeeded) {
+            irq_intercept_dev = dev;
+            qtest_send(chr, "OK\n");
+        } else {
+            qtest_send(chr, "FAIL No intercepts installed\n");
+        }
+    } else if (strcmp(words[0], "set_irq_in") == 0) {
+        DeviceState *dev;
+        qemu_irq irq;
+        char *name;
+        int ret;
+        int num;
+        int level;
+
+        g_assert(words[1] && words[2] && words[3] && words[4]);
+
+        dev = DEVICE(object_resolve_path(words[1], NULL));
+        if (!dev) {
+            qtest_send_prefix(chr);
+            qtest_send(chr, "FAIL Unknown device\n");
+            return;
+        }
+
+        if (strcmp(words[2], "unnamed-gpio-in") == 0) {
+            name = NULL;
+        } else {
+            name = words[2];
+        }
+
+        ret = qemu_strtoi(words[3], NULL, 0, &num);
+        g_assert(!ret);
+        ret = qemu_strtoi(words[4], NULL, 0, &level);
+        g_assert(!ret);
+
+        irq = qdev_get_gpio_in_named(dev, name, num);
+
+        qemu_set_irq(irq, level);
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "outb") == 0 ||
+               strcmp(words[0], "outw") == 0 ||
+               strcmp(words[0], "outl") == 0) {
+        unsigned long addr;
+        unsigned long value;
+        int ret;
+
+        g_assert(words[1] && words[2]);
+        ret = qemu_strtoul(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtoul(words[2], NULL, 0, &value);
+        g_assert(ret == 0);
+        g_assert(addr <= 0xffff);
+
+        if (words[0][3] == 'b') {
+            cpu_outb(addr, value);
+        } else if (words[0][3] == 'w') {
+            cpu_outw(addr, value);
+        } else if (words[0][3] == 'l') {
+            cpu_outl(addr, value);
+        }
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "inb") == 0 ||
+        strcmp(words[0], "inw") == 0 ||
+        strcmp(words[0], "inl") == 0) {
+        unsigned long addr;
+        uint32_t value = -1U;
+        int ret;
+
+        g_assert(words[1]);
+        ret = qemu_strtoul(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        g_assert(addr <= 0xffff);
+
+        if (words[0][2] == 'b') {
+            value = cpu_inb(addr);
+        } else if (words[0][2] == 'w') {
+            value = cpu_inw(addr);
+        } else if (words[0][2] == 'l') {
+            value = cpu_inl(addr);
+        }
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK 0x%04x\n", value);
+    } else if (strcmp(words[0], "writeb") == 0 ||
+               strcmp(words[0], "writew") == 0 ||
+               strcmp(words[0], "writel") == 0 ||
+               strcmp(words[0], "writeq") == 0) {
+        uint64_t addr;
+        uint64_t value;
+        int ret;
+
+        g_assert(words[1] && words[2]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &value);
+        g_assert(ret == 0);
+
+        if (words[0][5] == 'b') {
+            uint8_t data = value;
+            address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                                &data, 1);
+        } else if (words[0][5] == 'w') {
+            uint16_t data = value;
+            tswap16s(&data);
+            address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                                &data, 2);
+        } else if (words[0][5] == 'l') {
+            uint32_t data = value;
+            tswap32s(&data);
+            address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                                &data, 4);
+        } else if (words[0][5] == 'q') {
+            uint64_t data = value;
+            tswap64s(&data);
+            address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                                &data, 8);
+        }
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "readb") == 0 ||
+               strcmp(words[0], "readw") == 0 ||
+               strcmp(words[0], "readl") == 0 ||
+               strcmp(words[0], "readq") == 0) {
+        uint64_t addr;
+        uint64_t value = UINT64_C(-1);
+        int ret;
+
+        g_assert(words[1]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+
+        if (words[0][4] == 'b') {
+            uint8_t data;
+            address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                               &data, 1);
+            value = data;
+        } else if (words[0][4] == 'w') {
+            uint16_t data;
+            address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                               &data, 2);
+            value = tswap16(data);
+        } else if (words[0][4] == 'l') {
+            uint32_t data;
+            address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                               &data, 4);
+            value = tswap32(data);
+        } else if (words[0][4] == 'q') {
+            address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                               &value, 8);
+            tswap64s(&value);
+        }
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value);
+    } else if (strcmp(words[0], "read") == 0) {
+        uint64_t addr, len, i;
+        uint8_t *data;
+        char *enc;
+        int ret;
+
+        g_assert(words[1] && words[2]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &len);
+        g_assert(ret == 0);
+        /* We'd send garbage to libqtest if len is 0 */
+        g_assert(len);
+
+        data = g_malloc(len);
+        address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+                           len);
+
+        enc = g_malloc(2 * len + 1);
+        for (i = 0; i < len; i++) {
+            sprintf(&enc[i * 2], "%02x", data[i]);
+        }
+
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK 0x%s\n", enc);
+
+        g_free(data);
+        g_free(enc);
+    } else if (strcmp(words[0], "b64read") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        gchar *b64_data;
+        int ret;
+
+        g_assert(words[1] && words[2]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &len);
+        g_assert(ret == 0);
+
+        data = g_malloc(len);
+        address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+                           len);
+        b64_data = g_base64_encode(data, len);
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK %s\n", b64_data);
+
+        g_free(data);
+        g_free(b64_data);
+    } else if (strcmp(words[0], "write") == 0) {
+        uint64_t addr, len, i;
+        uint8_t *data;
+        size_t data_len;
+        int ret;
+
+        g_assert(words[1] && words[2] && words[3]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &len);
+        g_assert(ret == 0);
+
+        data_len = strlen(words[3]);
+        if (data_len < 3) {
+            qtest_send(chr, "ERR invalid argument size\n");
+            return;
+        }
+
+        data = g_malloc(len);
+        for (i = 0; i < len; i++) {
+            if ((i * 2 + 4) <= data_len) {
+                data[i] = hex2nib(words[3][i * 2 + 2]) << 4;
+                data[i] |= hex2nib(words[3][i * 2 + 3]);
+            } else {
+                data[i] = 0;
+            }
+        }
+        address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+                            len);
+        g_free(data);
+
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "memset") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        unsigned long pattern;
+        int ret;
+
+        g_assert(words[1] && words[2] && words[3]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &len);
+        g_assert(ret == 0);
+        ret = qemu_strtoul(words[3], NULL, 0, &pattern);
+        g_assert(ret == 0);
+
+        if (len) {
+            data = g_malloc(len);
+            memset(data, pattern, len);
+            address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, len);
+            g_free(data);
+        }
+
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    }  else if (strcmp(words[0], "b64write") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        size_t data_len;
+        gsize out_len;
+        int ret;
+
+        g_assert(words[1] && words[2] && words[3]);
+        ret = qemu_strtou64(words[1], NULL, 0, &addr);
+        g_assert(ret == 0);
+        ret = qemu_strtou64(words[2], NULL, 0, &len);
+        g_assert(ret == 0);
+
+        data_len = strlen(words[3]);
+        if (data_len < 3) {
+            qtest_send(chr, "ERR invalid argument size\n");
+            return;
+        }
+
+        data = g_base64_decode_inplace(words[3], &out_len);
+        if (out_len != len) {
+            qtest_log_send("b64write: data length mismatch (told %"PRIu64", "
+                           "found %zu)\n",
+                           len, out_len);
+            out_len = MIN(out_len, len);
+        }
+
+        address_space_write(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
+                            len);
+
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "endianness") == 0) {
+        qtest_send_prefix(chr);
+        if (target_words_bigendian()) {
+            qtest_sendf(chr, "OK big\n");
+        } else {
+            qtest_sendf(chr, "OK little\n");
+        }
+    } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) {
+        int64_t ns;
+
+        if (words[1]) {
+            int ret = qemu_strtoi64(words[1], NULL, 0, &ns);
+            g_assert(ret == 0);
+        } else {
+            ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+                                            QEMU_TIMER_ATTR_ALL);
+        }
+        qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns);
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK %"PRIi64"\n",
+                    (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    } else if (strcmp(words[0], "module_load") == 0) {
+        Error *local_err = NULL;
+        int rv;
+        g_assert(words[1] && words[2]);
+
+        qtest_send_prefix(chr);
+        rv = module_load(words[1], words[2], &local_err);
+        if (rv > 0) {
+            qtest_sendf(chr, "OK\n");
+        } else {
+            if (rv < 0) {
+                error_report_err(local_err);
+            }
+            qtest_sendf(chr, "FAIL\n");
+        }
+    } else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) {
+        int64_t ns;
+        int ret;
+
+        g_assert(words[1]);
+        ret = qemu_strtoi64(words[1], NULL, 0, &ns);
+        g_assert(ret == 0);
+        qtest_clock_warp(ns);
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK %"PRIi64"\n",
+                    (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    } else if (process_command_cb && process_command_cb(chr, words)) {
+        /* Command got consumed by the callback handler */
+    } else {
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "FAIL Unknown command '%s'\n", words[0]);
+    }
+}
+
+static void qtest_process_inbuf(CharBackend *chr, GString *inbuf)
+{
+    char *end;
+
+    while ((end = strchr(inbuf->str, '\n')) != NULL) {
+        size_t offset;
+        GString *cmd;
+        gchar **words;
+
+        offset = end - inbuf->str;
+
+        cmd = g_string_new_len(inbuf->str, offset);
+        g_string_erase(inbuf, 0, offset + 1);
+
+        words = g_strsplit(cmd->str, " ", 0);
+        qtest_process_command(chr, words);
+        g_strfreev(words);
+
+        g_string_free(cmd, TRUE);
+    }
+}
+
+static void qtest_read(void *opaque, const uint8_t *buf, int size)
+{
+    CharBackend *chr = opaque;
+
+    g_string_append_len(inbuf, (const gchar *)buf, size);
+    qtest_process_inbuf(chr, inbuf);
+}
+
+static int qtest_can_read(void *opaque)
+{
+    return 1024;
+}
+
+static void qtest_event(void *opaque, QEMUChrEvent event)
+{
+    int i;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        /*
+         * We used to call qemu_system_reset() here, hoping we could
+         * use the same process for multiple tests that way.  Never
+         * used.  Injects an extra reset even when it's not used, and
+         * that can mess up tests, e.g. -boot once.
+         */
+        for (i = 0; i < ARRAY_SIZE(irq_levels); i++) {
+            irq_levels[i] = 0;
+        }
+
+        g_clear_pointer(&timer, g_timer_destroy);
+        timer = g_timer_new();
+        qtest_opened = true;
+        if (qtest_log_fp) {
+            fprintf(qtest_log_fp, "[I " FMT_timeval "] OPENED\n", g_timer_elapsed(timer, NULL));
+        }
+        break;
+    case CHR_EVENT_CLOSED:
+        qtest_opened = false;
+        if (qtest_log_fp) {
+            fprintf(qtest_log_fp, "[I +" FMT_timeval "] CLOSED\n", g_timer_elapsed(timer, NULL));
+        }
+        g_clear_pointer(&timer, g_timer_destroy);
+        break;
+    default:
+        break;
+    }
+}
+
+void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
+{
+    ERRP_GUARD();
+    Chardev *chr;
+    Object *qtest;
+
+    chr = qemu_chr_new("qtest", qtest_chrdev, NULL);
+    if (chr == NULL) {
+        error_setg(errp, "Failed to initialize device for qtest: \"%s\"",
+                   qtest_chrdev);
+        return;
+    }
+
+    qtest = object_new(TYPE_QTEST);
+    object_property_set_str(qtest, "chardev", chr->label, &error_abort);
+    if (qtest_log) {
+        object_property_set_str(qtest, "log", qtest_log, &error_abort);
+    }
+    object_property_add_child(qdev_get_machine(), "qtest", qtest);
+    user_creatable_complete(USER_CREATABLE(qtest), errp);
+    if (*errp) {
+        object_unparent(qtest);
+    }
+    object_unref(OBJECT(chr));
+    object_unref(qtest);
+}
+
+static bool qtest_server_start(QTest *q, Error **errp)
+{
+    Chardev *chr = q->chr;
+    const char *qtest_log = q->log;
+
+    if (qtest_log) {
+        if (strcmp(qtest_log, "none") != 0) {
+            qtest_log_fp = fopen(qtest_log, "w+");
+        }
+    } else {
+        qtest_log_fp = stderr;
+    }
+
+    if (!qemu_chr_fe_init(&q->qtest_chr, chr, errp)) {
+        return false;
+    }
+    qemu_chr_fe_set_handlers(&q->qtest_chr, qtest_can_read, qtest_read,
+                             qtest_event, NULL, &q->qtest_chr, NULL, true);
+    qemu_chr_fe_set_echo(&q->qtest_chr, true);
+
+    inbuf = g_string_new("");
+
+    if (!qtest_server_send) {
+        qtest_server_set_send_handler(qtest_server_char_be_send, &q->qtest_chr);
+    }
+    qtest = q;
+    return true;
+}
+
+void qtest_server_set_send_handler(void (*send)(void*, const char*),
+                                   void *opaque)
+{
+    qtest_server_send = send;
+    qtest_server_send_opaque = opaque;
+}
+
+bool qtest_driver(void)
+{
+    return qtest && qtest->qtest_chr.chr != NULL;
+}
+
+void qtest_server_inproc_recv(void *dummy, const char *buf)
+{
+    static GString *gstr;
+    if (!gstr) {
+        gstr = g_string_new(NULL);
+    }
+    g_string_append(gstr, buf);
+    if (gstr->str[gstr->len - 1] == '\n') {
+        qtest_process_inbuf(NULL, gstr);
+        g_string_truncate(gstr, 0);
+    }
+}
+
+static void qtest_complete(UserCreatable *uc, Error **errp)
+{
+    QTest *q = QTEST(uc);
+    if (qtest) {
+        error_setg(errp, "Only one instance of qtest can be created");
+        return;
+    }
+    if (!q->chr_name) {
+        error_setg(errp, "No backend specified");
+        return;
+    }
+
+    if (OBJECT(uc)->parent != qdev_get_machine()) {
+        q->has_machine_link = true;
+        object_property_add_const_link(qdev_get_machine(), "qtest", OBJECT(uc));
+    } else {
+        /* -qtest was used.  */
+    }
+
+    qtest_server_start(q, errp);
+}
+
+static void qtest_unparent(Object *obj)
+{
+    QTest *q = QTEST(obj);
+
+    if (qtest == q) {
+        qemu_chr_fe_disconnect(&q->qtest_chr);
+        assert(!qtest_opened);
+        qemu_chr_fe_deinit(&q->qtest_chr, false);
+        if (qtest_log_fp) {
+            fclose(qtest_log_fp);
+            qtest_log_fp = NULL;
+        }
+        qtest = NULL;
+    }
+
+    if (q->has_machine_link) {
+        object_property_del(qdev_get_machine(), "qtest");
+        q->has_machine_link = false;
+    }
+}
+
+static void qtest_set_log(Object *obj, const char *value, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    if (qtest == q) {
+        error_setg(errp, "Property 'log' can not be set now");
+    } else {
+        g_free(q->log);
+        q->log = g_strdup(value);
+    }
+}
+
+static char *qtest_get_log(Object *obj, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    return g_strdup(q->log);
+}
+
+static void qtest_set_chardev(Object *obj, const char *value, Error **errp)
+{
+    QTest *q = QTEST(obj);
+    Chardev *chr;
+
+    if (qtest == q) {
+        error_setg(errp, "Property 'chardev' can not be set now");
+        return;
+    }
+
+    chr = qemu_chr_find(value);
+    if (!chr) {
+        error_setg(errp, "Cannot find character device '%s'", value);
+        return;
+    }
+
+    g_free(q->chr_name);
+    q->chr_name = g_strdup(value);
+
+    if (q->chr) {
+        object_unref(q->chr);
+    }
+    q->chr = chr;
+    object_ref(chr);
+}
+
+static char *qtest_get_chardev(Object *obj, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    return g_strdup(q->chr_name);
+}
+
+static void qtest_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    oc->unparent = qtest_unparent;
+    ucc->complete = qtest_complete;
+
+    object_class_property_add_str(oc, "chardev",
+                                  qtest_get_chardev, qtest_set_chardev);
+    object_class_property_add_str(oc, "log",
+                                  qtest_get_log, qtest_set_log);
+}
+
+static const TypeInfo qtest_info = {
+    .name = TYPE_QTEST,
+    .parent = TYPE_OBJECT,
+    .class_init = qtest_class_init,
+    .instance_size = sizeof(QTest),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&qtest_info);
+}
+
+type_init(register_types);
diff --git a/system/rtc.c b/system/rtc.c
new file mode 100644
index 0000000..4904581
--- /dev/null
+++ b/system/rtc.c
@@ -0,0 +1,192 @@
+/*
+ * RTC configuration and clock read
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+#include "sysemu/replay.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/rtc.h"
+#include "hw/rtc/mc146818rtc.h"
+
+static enum {
+    RTC_BASE_UTC,
+    RTC_BASE_LOCALTIME,
+    RTC_BASE_DATETIME,
+} rtc_base_type = RTC_BASE_UTC;
+static time_t rtc_ref_start_datetime;
+static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */
+static int rtc_host_datetime_offset = -1; /* valid & used only with
+                                             RTC_BASE_DATETIME */
+QEMUClockType rtc_clock;
+/***********************************************************/
+/* RTC reference time/date access */
+static time_t qemu_ref_timedate(QEMUClockType clock)
+{
+    time_t value = qemu_clock_get_ms(clock) / 1000;
+    switch (clock) {
+    case QEMU_CLOCK_REALTIME:
+        value -= rtc_realtime_clock_offset;
+        /* fall through */
+    case QEMU_CLOCK_VIRTUAL:
+        value += rtc_ref_start_datetime;
+        break;
+    case QEMU_CLOCK_HOST:
+        if (rtc_base_type == RTC_BASE_DATETIME) {
+            value -= rtc_host_datetime_offset;
+        }
+        break;
+    default:
+        assert(0);
+    }
+    return value;
+}
+
+void qemu_get_timedate(struct tm *tm, time_t offset)
+{
+    time_t ti = qemu_ref_timedate(rtc_clock);
+
+    ti += offset;
+
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
+        gmtime_r(&ti, tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+        localtime_r(&ti, tm);
+        break;
+    }
+}
+
+time_t qemu_timedate_diff(struct tm *tm)
+{
+    time_t seconds;
+
+    switch (rtc_base_type) {
+    case RTC_BASE_DATETIME:
+    case RTC_BASE_UTC:
+        seconds = mktimegm(tm);
+        break;
+    case RTC_BASE_LOCALTIME:
+    {
+        struct tm tmp = *tm;
+        tmp.tm_isdst = -1; /* use timezone to figure it out */
+        seconds = mktime(&tmp);
+        break;
+    }
+    default:
+        abort();
+    }
+
+    return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST);
+}
+
+static void configure_rtc_base_datetime(const char *startdate)
+{
+    time_t rtc_start_datetime;
+    struct tm tm;
+
+    if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon,
+               &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 6) {
+        /* OK */
+    } else if (sscanf(startdate, "%d-%d-%d",
+                      &tm.tm_year, &tm.tm_mon, &tm.tm_mday) == 3) {
+        tm.tm_hour = 0;
+        tm.tm_min = 0;
+        tm.tm_sec = 0;
+    } else {
+        goto date_fail;
+    }
+    tm.tm_year -= 1900;
+    tm.tm_mon--;
+    rtc_start_datetime = mktimegm(&tm);
+    if (rtc_start_datetime == -1) {
+    date_fail:
+        error_report("invalid datetime format");
+        error_printf("valid formats: "
+                     "'2006-06-17T16:01:21' or '2006-06-17'\n");
+        exit(1);
+    }
+    rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime;
+    rtc_ref_start_datetime = rtc_start_datetime;
+}
+
+void configure_rtc(QemuOpts *opts)
+{
+    const char *value;
+
+    /* Set defaults */
+    rtc_clock = QEMU_CLOCK_HOST;
+    rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
+    rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+
+    value = qemu_opt_get(opts, "base");
+    if (value) {
+        if (!strcmp(value, "utc")) {
+            rtc_base_type = RTC_BASE_UTC;
+        } else if (!strcmp(value, "localtime")) {
+            rtc_base_type = RTC_BASE_LOCALTIME;
+            replay_add_blocker("-rtc base=localtime");
+        } else {
+            rtc_base_type = RTC_BASE_DATETIME;
+            configure_rtc_base_datetime(value);
+        }
+    }
+    value = qemu_opt_get(opts, "clock");
+    if (value) {
+        if (!strcmp(value, "host")) {
+            rtc_clock = QEMU_CLOCK_HOST;
+        } else if (!strcmp(value, "rt")) {
+            rtc_clock = QEMU_CLOCK_REALTIME;
+        } else if (!strcmp(value, "vm")) {
+            rtc_clock = QEMU_CLOCK_VIRTUAL;
+        } else {
+            error_report("invalid option value '%s'", value);
+            exit(1);
+        }
+    }
+    value = qemu_opt_get(opts, "driftfix");
+    if (value) {
+        if (!strcmp(value, "slew")) {
+            object_register_sugar_prop(TYPE_MC146818_RTC,
+                                       "lost_tick_policy",
+                                       "slew",
+                                       false);
+            if (!object_class_by_name(TYPE_MC146818_RTC)) {
+                warn_report("driftfix 'slew' is not available with this machine");
+            }
+        } else if (!strcmp(value, "none")) {
+            /* discard is default */
+        } else {
+            error_report("invalid option value '%s'", value);
+            exit(1);
+        }
+    }
+}
diff --git a/system/runstate-action.c b/system/runstate-action.c
new file mode 100644
index 0000000..ae0761a
--- /dev/null
+++ b/system/runstate-action.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/watchdog.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qemu/option_int.h"
+
+RebootAction reboot_action = REBOOT_ACTION_RESET;
+ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+PanicAction panic_action = PANIC_ACTION_SHUTDOWN;
+
+/*
+ * Receives actions to be applied for specific guest events
+ * and sets the internal state as requested.
+ */
+void qmp_set_action(bool has_reboot, RebootAction reboot,
+                    bool has_shutdown, ShutdownAction shutdown,
+                    bool has_panic, PanicAction panic,
+                    bool has_watchdog, WatchdogAction watchdog,
+                    Error **errp)
+{
+    if (has_reboot) {
+        reboot_action = reboot;
+    }
+
+    if (has_panic) {
+        panic_action = panic;
+    }
+
+    if (has_watchdog) {
+        qmp_watchdog_set_action(watchdog, errp);
+    }
+
+    /* Process shutdown last, in case the panic action needs to be altered */
+    if (has_shutdown) {
+        shutdown_action = shutdown;
+    }
+}
diff --git a/system/runstate-hmp-cmds.c b/system/runstate-hmp-cmds.c
new file mode 100644
index 0000000..2df670f
--- /dev/null
+++ b/system/runstate-hmp-cmds.c
@@ -0,0 +1,95 @@
+/*
+ * HMP commands related to run state
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/cpu-common.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-run-state.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/accel.h"
+
+void hmp_info_status(Monitor *mon, const QDict *qdict)
+{
+    StatusInfo *info;
+
+    info = qmp_query_status(NULL);
+
+    monitor_printf(mon, "VM status: %s",
+                   info->running ? "running" : "paused");
+
+    if (!info->running && info->status != RUN_STATE_PAUSED) {
+        monitor_printf(mon, " (%s)", RunState_str(info->status));
+    }
+
+    monitor_printf(mon, "\n");
+
+    qapi_free_StatusInfo(info);
+}
+
+void hmp_one_insn_per_tb(Monitor *mon, const QDict *qdict)
+{
+    const char *option = qdict_get_try_str(qdict, "option");
+    AccelState *accel = current_accel();
+    bool newval;
+
+    if (!object_property_find(OBJECT(accel), "one-insn-per-tb")) {
+        monitor_printf(mon,
+                       "This accelerator does not support setting one-insn-per-tb\n");
+        return;
+    }
+
+    if (!option || !strcmp(option, "on")) {
+        newval = true;
+    } else if (!strcmp(option, "off")) {
+        newval = false;
+    } else {
+        monitor_printf(mon, "unexpected option %s\n", option);
+        return;
+    }
+    /* If the property exists then setting it can never fail */
+    object_property_set_bool(OBJECT(accel), "one-insn-per-tb",
+                             newval, &error_abort);
+}
+
+void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    WatchdogAction action;
+    char *qapi_value;
+
+    qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1);
+    action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err);
+    g_free(qapi_value);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+    qmp_watchdog_set_action(action, &error_abort);
+}
+
+void watchdog_action_completion(ReadLineState *rs, int nb_args, const char *str)
+{
+    int i;
+
+    if (nb_args != 2) {
+        return;
+    }
+    readline_set_completion_index(rs, strlen(str));
+    for (i = 0; i < WATCHDOG_ACTION__MAX; i++) {
+        readline_add_completion_of(rs, str, WatchdogAction_str(i));
+    }
+}
diff --git a/system/runstate.c b/system/runstate.c
new file mode 100644
index 0000000..1652ed0
--- /dev/null
+++ b/system/runstate.c
@@ -0,0 +1,871 @@
+/*
+ * QEMU main system emulation loop
+ *
+ * Copyright (c) 2003-2020 QEMU contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "audio/audio.h"
+#include "block/block.h"
+#include "block/export.h"
+#include "chardev/char.h"
+#include "crypto/cipher.h"
+#include "crypto/init.h"
+#include "exec/cpu-common.h"
+#include "gdbstub/syscalls.h"
+#include "hw/boards.h"
+#include "migration/misc.h"
+#include "migration/postcopy-ram.h"
+#include "monitor/monitor.h"
+#include "net/net.h"
+#include "net/vhost_net.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-run-state.h"
+#include "qapi/qapi-events-run-state.h"
+#include "qemu/accel.h"
+#include "qemu/error-report.h"
+#include "qemu/job.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/plugin.h"
+#include "qemu/sockets.h"
+#include "qemu/timer.h"
+#include "qemu/thread.h"
+#include "qom/object.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/cpus.h"
+#include "sysemu/qtest.h"
+#include "sysemu/replay.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/tpm.h"
+#include "trace.h"
+
+static NotifierList exit_notifiers =
+    NOTIFIER_LIST_INITIALIZER(exit_notifiers);
+
+static RunState current_run_state = RUN_STATE_PRELAUNCH;
+
+/* We use RUN_STATE__MAX but any invalid value will do */
+static RunState vmstop_requested = RUN_STATE__MAX;
+static QemuMutex vmstop_lock;
+
+typedef struct {
+    RunState from;
+    RunState to;
+} RunStateTransition;
+
+static const RunStateTransition runstate_transitions_def[] = {
+    { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+
+    { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
+    { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
+    { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
+    { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_INMIGRATE, RUN_STATE_COLO },
+
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
+    { RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_PAUSED, RUN_STATE_RUNNING },
+    { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_PAUSED, RUN_STATE_COLO},
+
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED },
+
+    { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
+    { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE_COLO, RUN_STATE_RUNNING },
+    { RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
+
+    { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
+    { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
+    { RUN_STATE_RUNNING, RUN_STATE_PAUSED },
+    { RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM },
+    { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
+    { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
+    { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
+    { RUN_STATE_RUNNING, RUN_STATE_COLO},
+
+    { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
+
+    { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_SHUTDOWN, RUN_STATE_COLO },
+
+    { RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
+    { RUN_STATE_RUNNING, RUN_STATE_SUSPENDED },
+    { RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
+    { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_SUSPENDED, RUN_STATE_COLO},
+
+    { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
+    { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_WATCHDOG, RUN_STATE_COLO},
+
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
+    { RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH },
+
+    { RUN_STATE__MAX, RUN_STATE__MAX },
+};
+
+static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
+
+bool runstate_check(RunState state)
+{
+    return current_run_state == state;
+}
+
+static void runstate_init(void)
+{
+    const RunStateTransition *p;
+
+    memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
+    for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
+        runstate_valid_transitions[p->from][p->to] = true;
+    }
+
+    qemu_mutex_init(&vmstop_lock);
+}
+
+/* This function will abort() on invalid state transitions */
+void runstate_set(RunState new_state)
+{
+    assert(new_state < RUN_STATE__MAX);
+
+    trace_runstate_set(current_run_state, RunState_str(current_run_state),
+                       new_state, RunState_str(new_state));
+
+    if (current_run_state == new_state) {
+        return;
+    }
+
+    if (!runstate_valid_transitions[current_run_state][new_state]) {
+        error_report("invalid runstate transition: '%s' -> '%s'",
+                     RunState_str(current_run_state),
+                     RunState_str(new_state));
+        abort();
+    }
+
+    current_run_state = new_state;
+}
+
+RunState runstate_get(void)
+{
+    return current_run_state;
+}
+
+bool runstate_is_running(void)
+{
+    return runstate_check(RUN_STATE_RUNNING);
+}
+
+bool runstate_needs_reset(void)
+{
+    return runstate_check(RUN_STATE_INTERNAL_ERROR) ||
+        runstate_check(RUN_STATE_SHUTDOWN);
+}
+
+StatusInfo *qmp_query_status(Error **errp)
+{
+    StatusInfo *info = g_malloc0(sizeof(*info));
+    AccelState *accel = current_accel();
+
+    /*
+     * We ignore errors, which will happen if the accelerator
+     * is not TCG. "singlestep" is meaningless for other accelerators,
+     * so we will set the StatusInfo field to false for those.
+     */
+    info->singlestep = object_property_get_bool(OBJECT(accel),
+                                                "one-insn-per-tb", NULL);
+    info->running = runstate_is_running();
+    info->status = current_run_state;
+
+    return info;
+}
+
+bool qemu_vmstop_requested(RunState *r)
+{
+    qemu_mutex_lock(&vmstop_lock);
+    *r = vmstop_requested;
+    vmstop_requested = RUN_STATE__MAX;
+    qemu_mutex_unlock(&vmstop_lock);
+    return *r < RUN_STATE__MAX;
+}
+
+void qemu_system_vmstop_request_prepare(void)
+{
+    qemu_mutex_lock(&vmstop_lock);
+}
+
+void qemu_system_vmstop_request(RunState state)
+{
+    vmstop_requested = state;
+    qemu_mutex_unlock(&vmstop_lock);
+    qemu_notify_event();
+}
+struct VMChangeStateEntry {
+    VMChangeStateHandler *cb;
+    VMChangeStateHandler *prepare_cb;
+    void *opaque;
+    QTAILQ_ENTRY(VMChangeStateEntry) entries;
+    int priority;
+};
+
+static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
+    QTAILQ_HEAD_INITIALIZER(vm_change_state_head);
+
+/**
+ * qemu_add_vm_change_state_handler_prio:
+ * @cb: the callback to invoke
+ * @opaque: user data passed to the callback
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *            true when the vm stops
+ *
+ * Register a callback function that is invoked when the vm starts or stops
+ * running.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
+        VMChangeStateHandler *cb, void *opaque, int priority)
+{
+    return qemu_add_vm_change_state_handler_prio_full(cb, NULL, opaque,
+                                                      priority);
+}
+
+/**
+ * qemu_add_vm_change_state_handler_prio_full:
+ * @cb: the main callback to invoke
+ * @prepare_cb: a callback to invoke before the main callback
+ * @opaque: user data passed to the callbacks
+ * @priority: low priorities execute first when the vm runs and the reverse is
+ *            true when the vm stops
+ *
+ * Register a main callback function and an optional prepare callback function
+ * that are invoked when the vm starts or stops running. The main callback and
+ * the prepare callback are called in two separate phases: First all prepare
+ * callbacks are called and only then all main callbacks are called. As its
+ * name suggests, the prepare callback can be used to do some preparatory work
+ * before invoking the main callback.
+ *
+ * Returns: an entry to be freed using qemu_del_vm_change_state_handler()
+ */
+VMChangeStateEntry *
+qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb,
+                                           VMChangeStateHandler *prepare_cb,
+                                           void *opaque, int priority)
+{
+    VMChangeStateEntry *e;
+    VMChangeStateEntry *other;
+
+    e = g_malloc0(sizeof(*e));
+    e->cb = cb;
+    e->prepare_cb = prepare_cb;
+    e->opaque = opaque;
+    e->priority = priority;
+
+    /* Keep list sorted in ascending priority order */
+    QTAILQ_FOREACH(other, &vm_change_state_head, entries) {
+        if (priority < other->priority) {
+            QTAILQ_INSERT_BEFORE(other, e, entries);
+            return e;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
+    return e;
+}
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+                                                     void *opaque)
+{
+    return qemu_add_vm_change_state_handler_prio(cb, opaque, 0);
+}
+
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
+{
+    QTAILQ_REMOVE(&vm_change_state_head, e, entries);
+    g_free(e);
+}
+
+void vm_state_notify(bool running, RunState state)
+{
+    VMChangeStateEntry *e, *next;
+
+    trace_vm_state_notify(running, state, RunState_str(state));
+
+    if (running) {
+        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
+        QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
+            e->cb(e->opaque, running, state);
+        }
+    } else {
+        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+            if (e->prepare_cb) {
+                e->prepare_cb(e->opaque, running, state);
+            }
+        }
+
+        QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
+            e->cb(e->opaque, running, state);
+        }
+    }
+}
+
+static ShutdownCause reset_requested;
+static ShutdownCause shutdown_requested;
+static int shutdown_signal;
+static pid_t shutdown_pid;
+static int powerdown_requested;
+static int debug_requested;
+static int suspend_requested;
+static WakeupReason wakeup_reason;
+static NotifierList powerdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
+static NotifierList suspend_notifiers =
+    NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
+static NotifierList wakeup_notifiers =
+    NOTIFIER_LIST_INITIALIZER(wakeup_notifiers);
+static NotifierList shutdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(shutdown_notifiers);
+static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE);
+
+ShutdownCause qemu_shutdown_requested_get(void)
+{
+    return shutdown_requested;
+}
+
+ShutdownCause qemu_reset_requested_get(void)
+{
+    return reset_requested;
+}
+
+static int qemu_shutdown_requested(void)
+{
+    return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE);
+}
+
+static void qemu_kill_report(void)
+{
+    if (!qtest_driver() && shutdown_signal) {
+        if (shutdown_pid == 0) {
+            /* This happens for eg ^C at the terminal, so it's worth
+             * avoiding printing an odd message in that case.
+             */
+            error_report("terminating on signal %d", shutdown_signal);
+        } else {
+            char *shutdown_cmd = qemu_get_pid_name(shutdown_pid);
+
+            error_report("terminating on signal %d from pid " FMT_pid " (%s)",
+                         shutdown_signal, shutdown_pid,
+                         shutdown_cmd ? shutdown_cmd : "<unknown process>");
+            g_free(shutdown_cmd);
+        }
+        shutdown_signal = 0;
+    }
+}
+
+static ShutdownCause qemu_reset_requested(void)
+{
+    ShutdownCause r = reset_requested;
+
+    if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) {
+        reset_requested = SHUTDOWN_CAUSE_NONE;
+        return r;
+    }
+    return SHUTDOWN_CAUSE_NONE;
+}
+
+static int qemu_suspend_requested(void)
+{
+    int r = suspend_requested;
+    if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) {
+        suspend_requested = 0;
+        return r;
+    }
+    return false;
+}
+
+static WakeupReason qemu_wakeup_requested(void)
+{
+    return wakeup_reason;
+}
+
+static int qemu_powerdown_requested(void)
+{
+    int r = powerdown_requested;
+    powerdown_requested = 0;
+    return r;
+}
+
+static int qemu_debug_requested(void)
+{
+    int r = debug_requested;
+    debug_requested = 0;
+    return r;
+}
+
+/*
+ * Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE.
+ */
+void qemu_system_reset(ShutdownCause reason)
+{
+    MachineClass *mc;
+
+    mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
+
+    cpu_synchronize_all_states();
+
+    if (mc && mc->reset) {
+        mc->reset(current_machine, reason);
+    } else {
+        qemu_devices_reset(reason);
+    }
+    switch (reason) {
+    case SHUTDOWN_CAUSE_NONE:
+    case SHUTDOWN_CAUSE_SUBSYSTEM_RESET:
+    case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
+        break;
+    default:
+        qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
+    }
+    cpu_synchronize_all_post_reset();
+}
+
+/*
+ * Wake the VM after suspend.
+ */
+static void qemu_system_wakeup(void)
+{
+    MachineClass *mc;
+
+    mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
+
+    if (mc && mc->wakeup) {
+        mc->wakeup(current_machine);
+    }
+}
+
+void qemu_system_guest_panicked(GuestPanicInformation *info)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
+
+    if (current_cpu) {
+        current_cpu->crash_occurred = true;
+    }
+    /*
+     * TODO:  Currently the available panic actions are: none, pause, and
+     * shutdown, but in principle debug and reset could be supported as well.
+     * Investigate any potential use cases for the unimplemented actions.
+     */
+    if (panic_action == PANIC_ACTION_PAUSE
+        || (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+    } else if (panic_action == PANIC_ACTION_SHUTDOWN ||
+               panic_action == PANIC_ACTION_EXIT_FAILURE) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC);
+    } else {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info);
+    }
+
+    if (info) {
+        if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) {
+            qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64
+                          " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n",
+                          info->u.hyper_v.arg1,
+                          info->u.hyper_v.arg2,
+                          info->u.hyper_v.arg3,
+                          info->u.hyper_v.arg4,
+                          info->u.hyper_v.arg5);
+        } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) {
+            qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n"
+                          "PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n",
+                          info->u.s390.core,
+                          S390CrashReason_str(info->u.s390.reason),
+                          info->u.s390.psw_mask,
+                          info->u.s390.psw_addr);
+        }
+        qapi_free_GuestPanicInformation(info);
+    }
+}
+
+void qemu_system_guest_crashloaded(GuestPanicInformation *info)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded");
+    qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info);
+    qapi_free_GuestPanicInformation(info);
+}
+
+void qemu_system_reset_request(ShutdownCause reason)
+{
+    if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
+        reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
+        shutdown_requested = reason;
+    } else if (!cpus_are_resettable()) {
+        error_report("cpus are not resettable, terminating");
+        shutdown_requested = reason;
+    } else {
+        reset_requested = reason;
+    }
+    cpu_stop_current();
+    qemu_notify_event();
+}
+
+static void qemu_system_suspend(void)
+{
+    pause_all_vcpus();
+    notifier_list_notify(&suspend_notifiers, NULL);
+    runstate_set(RUN_STATE_SUSPENDED);
+    qapi_event_send_suspend();
+}
+
+void qemu_system_suspend_request(void)
+{
+    if (runstate_check(RUN_STATE_SUSPENDED)) {
+        return;
+    }
+    suspend_requested = 1;
+    cpu_stop_current();
+    qemu_notify_event();
+}
+
+void qemu_register_suspend_notifier(Notifier *notifier)
+{
+    notifier_list_add(&suspend_notifiers, notifier);
+}
+
+void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
+{
+    trace_system_wakeup_request(reason);
+
+    if (!runstate_check(RUN_STATE_SUSPENDED)) {
+        error_setg(errp,
+                   "Unable to wake up: guest is not in suspended state");
+        return;
+    }
+    if (!(wakeup_reason_mask & (1 << reason))) {
+        return;
+    }
+    runstate_set(RUN_STATE_RUNNING);
+    wakeup_reason = reason;
+    qemu_notify_event();
+}
+
+void qemu_system_wakeup_enable(WakeupReason reason, bool enabled)
+{
+    if (enabled) {
+        wakeup_reason_mask |= (1 << reason);
+    } else {
+        wakeup_reason_mask &= ~(1 << reason);
+    }
+}
+
+void qemu_register_wakeup_notifier(Notifier *notifier)
+{
+    notifier_list_add(&wakeup_notifiers, notifier);
+}
+
+static bool wakeup_suspend_enabled;
+
+void qemu_register_wakeup_support(void)
+{
+    wakeup_suspend_enabled = true;
+}
+
+bool qemu_wakeup_suspend_enabled(void)
+{
+    return wakeup_suspend_enabled;
+}
+
+void qemu_system_killed(int signal, pid_t pid)
+{
+    shutdown_signal = signal;
+    shutdown_pid = pid;
+    shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+
+    /* Cannot call qemu_system_shutdown_request directly because
+     * we are in a signal handler.
+     */
+    shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL;
+    qemu_notify_event();
+}
+
+void qemu_system_shutdown_request(ShutdownCause reason)
+{
+    trace_qemu_system_shutdown_request(reason);
+    replay_shutdown_request(reason);
+    shutdown_requested = reason;
+    qemu_notify_event();
+}
+
+static void qemu_system_powerdown(void)
+{
+    qapi_event_send_powerdown();
+    notifier_list_notify(&powerdown_notifiers, NULL);
+}
+
+static void qemu_system_shutdown(ShutdownCause cause)
+{
+    qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause);
+    notifier_list_notify(&shutdown_notifiers, &cause);
+}
+
+void qemu_system_powerdown_request(void)
+{
+    trace_qemu_system_powerdown_request();
+    powerdown_requested = 1;
+    qemu_notify_event();
+}
+
+void qemu_register_powerdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&powerdown_notifiers, notifier);
+}
+
+void qemu_register_shutdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&shutdown_notifiers, notifier);
+}
+
+void qemu_system_debug_request(void)
+{
+    debug_requested = 1;
+    qemu_notify_event();
+}
+
+static bool main_loop_should_exit(int *status)
+{
+    RunState r;
+    ShutdownCause request;
+
+    if (qemu_debug_requested()) {
+        vm_stop(RUN_STATE_DEBUG);
+    }
+    if (qemu_suspend_requested()) {
+        qemu_system_suspend();
+    }
+    request = qemu_shutdown_requested();
+    if (request) {
+        qemu_kill_report();
+        qemu_system_shutdown(request);
+        if (shutdown_action == SHUTDOWN_ACTION_PAUSE) {
+            vm_stop(RUN_STATE_SHUTDOWN);
+        } else {
+            if (request == SHUTDOWN_CAUSE_GUEST_PANIC &&
+                panic_action == PANIC_ACTION_EXIT_FAILURE) {
+                *status = EXIT_FAILURE;
+            }
+            return true;
+        }
+    }
+    request = qemu_reset_requested();
+    if (request) {
+        pause_all_vcpus();
+        qemu_system_reset(request);
+        resume_all_vcpus();
+        /*
+         * runstate can change in pause_all_vcpus()
+         * as iothread mutex is unlocked
+         */
+        if (!runstate_check(RUN_STATE_RUNNING) &&
+                !runstate_check(RUN_STATE_INMIGRATE) &&
+                !runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+            runstate_set(RUN_STATE_PRELAUNCH);
+        }
+    }
+    if (qemu_wakeup_requested()) {
+        pause_all_vcpus();
+        qemu_system_wakeup();
+        notifier_list_notify(&wakeup_notifiers, &wakeup_reason);
+        wakeup_reason = QEMU_WAKEUP_REASON_NONE;
+        resume_all_vcpus();
+        qapi_event_send_wakeup();
+    }
+    if (qemu_powerdown_requested()) {
+        qemu_system_powerdown();
+    }
+    if (qemu_vmstop_requested(&r)) {
+        vm_stop(r);
+    }
+    return false;
+}
+
+int qemu_main_loop(void)
+{
+    int status = EXIT_SUCCESS;
+
+    while (!main_loop_should_exit(&status)) {
+        main_loop_wait(false);
+    }
+
+    return status;
+}
+
+void qemu_add_exit_notifier(Notifier *notify)
+{
+    notifier_list_add(&exit_notifiers, notify);
+}
+
+void qemu_remove_exit_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
+static void qemu_run_exit_notifiers(void)
+{
+    notifier_list_notify(&exit_notifiers, NULL);
+}
+
+void qemu_init_subsystems(void)
+{
+    Error *err = NULL;
+
+    os_set_line_buffering();
+
+    module_call_init(MODULE_INIT_TRACE);
+
+    qemu_init_cpu_list();
+    qemu_init_cpu_loop();
+    qemu_mutex_lock_iothread();
+
+    atexit(qemu_run_exit_notifiers);
+
+    module_call_init(MODULE_INIT_QOM);
+    module_call_init(MODULE_INIT_MIGRATION);
+
+    runstate_init();
+    precopy_infrastructure_init();
+    postcopy_infrastructure_init();
+    monitor_init_globals();
+
+    if (qcrypto_init(&err) < 0) {
+        error_reportf_err(err, "cannot initialize crypto: ");
+        exit(1);
+    }
+
+    os_setup_early_signal_handling();
+
+    bdrv_init_with_whitelist();
+    socket_init();
+}
+
+
+void qemu_cleanup(void)
+{
+    gdb_exit(0);
+
+    /*
+     * cleaning up the migration object cancels any existing migration
+     * try to do this early so that it also stops using devices.
+     */
+    migration_shutdown();
+
+    /*
+     * Close the exports before draining the block layer. The export
+     * drivers may have coroutines yielding on it, so we need to clean
+     * them up before the drain, as otherwise they may be get stuck in
+     * blk_wait_while_drained().
+     */
+    blk_exp_close_all();
+
+
+    /* No more vcpu or device emulation activity beyond this point */
+    vm_shutdown();
+    replay_finish();
+
+    /*
+     * We must cancel all block jobs while the block layer is drained,
+     * or cancelling will be affected by throttling and thus may block
+     * for an extended period of time.
+     * Begin the drained section after vm_shutdown() to avoid requests being
+     * stuck in the BlockBackend's request queue.
+     * We do not need to end this section, because we do not want any
+     * requests happening from here on anyway.
+     */
+    bdrv_drain_all_begin();
+    job_cancel_sync_all();
+    bdrv_close_all();
+
+    /* vhost-user must be cleaned up before chardevs.  */
+    tpm_cleanup();
+    net_cleanup();
+    audio_cleanup();
+    monitor_cleanup();
+    qemu_chr_cleanup();
+    user_creatable_cleanup();
+    /* TODO: unref root container, check all devices are ok */
+}
diff --git a/system/tpm-hmp-cmds.c b/system/tpm-hmp-cmds.c
new file mode 100644
index 0000000..9ed6ad6
--- /dev/null
+++ b/system/tpm-hmp-cmds.c
@@ -0,0 +1,65 @@
+/*
+ * HMP commands related to TPM
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp.h"
+#include "qapi/error.h"
+
+void hmp_info_tpm(Monitor *mon, const QDict *qdict)
+{
+#ifdef CONFIG_TPM
+    TPMInfoList *info_list, *info;
+    Error *err = NULL;
+    unsigned int c = 0;
+    TPMPassthroughOptions *tpo;
+    TPMEmulatorOptions *teo;
+
+    info_list = qmp_query_tpm(&err);
+    if (err) {
+        monitor_printf(mon, "TPM device not supported\n");
+        error_free(err);
+        return;
+    }
+
+    if (info_list) {
+        monitor_printf(mon, "TPM device:\n");
+    }
+
+    for (info = info_list; info; info = info->next) {
+        TPMInfo *ti = info->value;
+        monitor_printf(mon, " tpm%d: model=%s\n",
+                       c, TpmModel_str(ti->model));
+
+        monitor_printf(mon, "  \\ %s: type=%s",
+                       ti->id, TpmType_str(ti->options->type));
+
+        switch (ti->options->type) {
+        case TPM_TYPE_PASSTHROUGH:
+            tpo = ti->options->u.passthrough.data;
+            monitor_printf(mon, "%s%s%s%s",
+                           tpo->path ? ",path=" : "",
+                           tpo->path ?: "",
+                           tpo->cancel_path ? ",cancel-path=" : "",
+                           tpo->cancel_path ?: "");
+            break;
+        case TPM_TYPE_EMULATOR:
+            teo = ti->options->u.emulator.data;
+            monitor_printf(mon, ",chardev=%s", teo->chardev);
+            break;
+        case TPM_TYPE__MAX:
+            break;
+        }
+        monitor_printf(mon, "\n");
+        c++;
+    }
+    qapi_free_TPMInfoList(info_list);
+#else
+    monitor_printf(mon, "TPM device not supported\n");
+#endif /* CONFIG_TPM */
+}
diff --git a/system/tpm.c b/system/tpm.c
new file mode 100644
index 0000000..578563f
--- /dev/null
+++ b/system/tpm.c
@@ -0,0 +1,239 @@
+/*
+ * TPM configuration
+ *
+ * Copyright (C) 2011-2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger    <stefanb@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Based on net.c
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qapi/qapi-commands-tpm.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/tpm_backend.h"
+#include "sysemu/tpm.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+
+static QLIST_HEAD(, TPMBackend) tpm_backends =
+    QLIST_HEAD_INITIALIZER(tpm_backends);
+
+static const TPMBackendClass *
+tpm_be_find_by_type(enum TpmType type)
+{
+    ObjectClass *oc;
+    char *typename = g_strdup_printf("tpm-%s", TpmType_str(type));
+
+    oc = object_class_by_name(typename);
+    g_free(typename);
+
+    if (!object_class_dynamic_cast(oc, TYPE_TPM_BACKEND)) {
+        return NULL;
+    }
+
+    return TPM_BACKEND_CLASS(oc);
+}
+
+/*
+ * Walk the list of available TPM backend drivers and display them on the
+ * screen.
+ */
+static void tpm_display_backend_drivers(void)
+{
+    bool got_one = false;
+    int i;
+
+    for (i = 0; i < TPM_TYPE__MAX; i++) {
+        const TPMBackendClass *bc = tpm_be_find_by_type(i);
+        if (!bc) {
+            continue;
+        }
+        if (!got_one) {
+            error_printf("Supported TPM types (choose only one):\n");
+            got_one = true;
+        }
+        error_printf("%12s   %s\n", TpmType_str(i), bc->desc);
+    }
+    if (!got_one) {
+        error_printf("No TPM backend types are available\n");
+    }
+}
+
+/*
+ * Find the TPM with the given Id
+ */
+TPMBackend *qemu_find_tpm_be(const char *id)
+{
+    TPMBackend *drv;
+
+    if (id) {
+        QLIST_FOREACH(drv, &tpm_backends, list) {
+            if (!strcmp(drv->id, id)) {
+                return drv;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+static int tpm_init_tpmdev(void *dummy, QemuOpts *opts, Error **errp)
+{
+    /*
+     * Use of error_report() in a function with an Error ** parameter
+     * is suspicious.  It is okay here.  The parameter only exists to
+     * make the function usable with qemu_opts_foreach().  It is not
+     * actually used.
+     */
+    const char *value;
+    const char *id;
+    const TPMBackendClass *be;
+    TPMBackend *drv;
+    Error *local_err = NULL;
+    int i;
+
+    if (!QLIST_EMPTY(&tpm_backends)) {
+        error_report("Only one TPM is allowed.");
+        return 1;
+    }
+
+    id = qemu_opts_id(opts);
+    if (id == NULL) {
+        error_report(QERR_MISSING_PARAMETER, "id");
+        return 1;
+    }
+
+    value = qemu_opt_get(opts, "type");
+    if (!value) {
+        error_report(QERR_MISSING_PARAMETER, "type");
+        tpm_display_backend_drivers();
+        return 1;
+    }
+
+    i = qapi_enum_parse(&TpmType_lookup, value, -1, NULL);
+    be = i >= 0 ? tpm_be_find_by_type(i) : NULL;
+    if (be == NULL) {
+        error_report(QERR_INVALID_PARAMETER_VALUE,
+                     "type", "a TPM backend type");
+        tpm_display_backend_drivers();
+        return 1;
+    }
+
+    /* validate backend specific opts */
+    if (!qemu_opts_validate(opts, be->opts, &local_err)) {
+        error_report_err(local_err);
+        return 1;
+    }
+
+    drv = be->create(opts);
+    if (!drv) {
+        return 1;
+    }
+
+    drv->id = g_strdup(id);
+    QLIST_INSERT_HEAD(&tpm_backends, drv, list);
+
+    return 0;
+}
+
+/*
+ * Walk the list of TPM backend drivers that are in use and call their
+ * destroy function to have them cleaned up.
+ */
+void tpm_cleanup(void)
+{
+    TPMBackend *drv, *next;
+
+    QLIST_FOREACH_SAFE(drv, &tpm_backends, list, next) {
+        QLIST_REMOVE(drv, list);
+        object_unref(OBJECT(drv));
+    }
+}
+
+/*
+ * Initialize the TPM. Process the tpmdev command line options describing the
+ * TPM backend.
+ */
+int tpm_init(void)
+{
+    if (qemu_opts_foreach(qemu_find_opts("tpmdev"),
+                          tpm_init_tpmdev, NULL, NULL)) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Parse the TPM configuration options.
+ * To display all available TPM backends the user may use '-tpmdev help'
+ */
+int tpm_config_parse(QemuOptsList *opts_list, const char *optarg)
+{
+    QemuOpts *opts;
+
+    if (!strcmp(optarg, "help")) {
+        tpm_display_backend_drivers();
+        return -1;
+    }
+    opts = qemu_opts_parse_noisily(opts_list, optarg, true);
+    if (!opts) {
+        return -1;
+    }
+    return 0;
+}
+
+/*
+ * Walk the list of active TPM backends and collect information about them.
+ */
+TPMInfoList *qmp_query_tpm(Error **errp)
+{
+    TPMBackend *drv;
+    TPMInfoList *head = NULL, **tail = &head;
+
+    QLIST_FOREACH(drv, &tpm_backends, list) {
+        if (!drv->tpmif) {
+            continue;
+        }
+
+        QAPI_LIST_APPEND(tail, tpm_backend_query_tpm(drv));
+    }
+
+    return head;
+}
+
+TpmTypeList *qmp_query_tpm_types(Error **errp)
+{
+    unsigned int i = 0;
+    TpmTypeList *head = NULL, **tail = &head;
+
+    for (i = 0; i < TPM_TYPE__MAX; i++) {
+        if (!tpm_be_find_by_type(i)) {
+            continue;
+        }
+        QAPI_LIST_APPEND(tail, i);
+    }
+
+    return head;
+}
+TpmModelList *qmp_query_tpm_models(Error **errp)
+{
+    TpmModelList *head = NULL, **tail = &head;
+    GSList *e, *l = object_class_get_list(TYPE_TPM_IF, false);
+
+    for (e = l; e; e = e->next) {
+        TPMIfClass *c = TPM_IF_CLASS(e->data);
+
+        QAPI_LIST_APPEND(tail, c->model);
+    }
+    g_slist_free(l);
+
+    return head;
+}
diff --git a/system/trace-events b/system/trace-events
new file mode 100644
index 0000000..69c9044
--- /dev/null
+++ b/system/trace-events
@@ -0,0 +1,40 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# balloon.c
+# Since requests are raised via monitor, not many tracepoints are needed.
+balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
+
+# ioport.c
+cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
+cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
+
+# memory.c
+memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
+memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
+memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
+flatview_new(void *view, void *root) "%p (root %p)"
+flatview_destroy(void *view, void *root) "%p (root %p)"
+flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
+global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
+
+# cpus.c
+vm_stop_flush_all(int ret) "ret %d"
+
+# vl.c
+vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)"
+load_file(const char *name, const char *path) "name %s location %s"
+runstate_set(int current_state, const char *current_state_str, int new_state, const char *new_state_str) "current_run_state %d (%s) new_state %d (%s)"
+system_wakeup_request(int reason) "reason=%d"
+qemu_system_shutdown_request(int reason) "reason=%d"
+qemu_system_powerdown_request(void) ""
+
+#dirtylimit.c
+dirtylimit_state_initialize(int max_cpus) "dirtylimit state initialize: max cpus %d"
+dirtylimit_state_finalize(void)
+dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
+dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
+dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
diff --git a/system/trace.h b/system/trace.h
new file mode 100644
index 0000000..cd0136d
--- /dev/null
+++ b/system/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-system.h"
diff --git a/system/vl.c b/system/vl.c
new file mode 100644
index 0000000..98e071e
--- /dev/null
+++ b/system/vl.c
@@ -0,0 +1,3730 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/help-texts.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "exec/cpu-common.h"
+#include "exec/page-vary.h"
+#include "hw/qdev-properties.h"
+#include "qapi/compat-policy.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qjson.h"
+#include "qemu-version.h"
+#include "qemu/cutils.h"
+#include "qemu/help_option.h"
+#include "qemu/hw-version.h"
+#include "qemu/uuid.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/seccomp.h"
+#include "sysemu/tcg.h"
+#include "sysemu/xen.h"
+
+#include "qemu/error-report.h"
+#include "qemu/sockets.h"
+#include "qemu/accel.h"
+#include "qemu/async-teardown.h"
+#include "hw/usb.h"
+#include "hw/isa/isa.h"
+#include "hw/scsi/scsi.h"
+#include "hw/display/vga.h"
+#include "hw/firmware/smbios.h"
+#include "hw/acpi/acpi.h"
+#include "hw/xen/xen.h"
+#include "hw/loader.h"
+#include "monitor/qdev.h"
+#include "net/net.h"
+#include "net/slirp.h"
+#include "monitor/monitor.h"
+#include "ui/console.h"
+#include "ui/input.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
+#include "sysemu/hostmem.h"
+#include "exec/gdbstub.h"
+#include "qemu/timer.h"
+#include "chardev/char.h"
+#include "qemu/bitmap.h"
+#include "qemu/log.h"
+#include "sysemu/blockdev.h"
+#include "hw/block/block.h"
+#include "hw/i386/x86.h"
+#include "hw/i386/pc.h"
+#include "migration/misc.h"
+#include "migration/snapshot.h"
+#include "sysemu/tpm.h"
+#include "sysemu/dma.h"
+#include "hw/audio/soundhw.h"
+#include "audio/audio.h"
+#include "sysemu/cpus.h"
+#include "sysemu/cpu-timers.h"
+#include "migration/colo.h"
+#include "migration/postcopy-ram.h"
+#include "sysemu/kvm.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qemu/main-loop.h"
+#ifdef CONFIG_VIRTFS
+#include "fsdev/qemu-fsdev.h"
+#endif
+#include "sysemu/qtest.h"
+#ifdef CONFIG_TCG
+#include "accel/tcg/perf.h"
+#endif
+
+#include "disas/disas.h"
+
+#include "trace.h"
+#include "trace/control.h"
+#include "qemu/plugin.h"
+#include "qemu/queue.h"
+#include "sysemu/arch_init.h"
+#include "exec/confidential-guest-support.h"
+
+#include "ui/qemu-spice.h"
+#include "qapi/string-input-visitor.h"
+#include "qapi/opts-visitor.h"
+#include "qapi/clone-visitor.h"
+#include "qom/object_interfaces.h"
+#include "semihosting/semihost.h"
+#include "crypto/init.h"
+#include "sysemu/replay.h"
+#include "qapi/qapi-events-run-state.h"
+#include "qapi/qapi-types-audio.h"
+#include "qapi/qapi-visit-audio.h"
+#include "qapi/qapi-visit-block-core.h"
+#include "qapi/qapi-visit-compat.h"
+#include "qapi/qapi-visit-machine.h"
+#include "qapi/qapi-visit-ui.h"
+#include "qapi/qapi-commands-block-core.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-visit-qom.h"
+#include "qapi/qapi-commands-ui.h"
+#include "block/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/iothread.h"
+#include "qemu/guest-random.h"
+#include "qemu/keyval.h"
+
+#define MAX_VIRTIO_CONSOLES 1
+
+typedef struct BlockdevOptionsQueueEntry {
+    BlockdevOptions *bdo;
+    Location loc;
+    QSIMPLEQ_ENTRY(BlockdevOptionsQueueEntry) entry;
+} BlockdevOptionsQueueEntry;
+
+typedef QSIMPLEQ_HEAD(, BlockdevOptionsQueueEntry) BlockdevOptionsQueue;
+
+typedef struct ObjectOption {
+    ObjectOptions *opts;
+    QTAILQ_ENTRY(ObjectOption) next;
+} ObjectOption;
+
+typedef struct DeviceOption {
+    QDict *opts;
+    Location loc;
+    QTAILQ_ENTRY(DeviceOption) next;
+} DeviceOption;
+
+static const char *cpu_option;
+static const char *mem_path;
+static const char *incoming;
+static const char *loadvm;
+static const char *accelerators;
+static bool have_custom_ram_size;
+static const char *ram_memdev_id;
+static QDict *machine_opts_dict;
+static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
+static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
+static int display_remote;
+static int snapshot;
+static bool preconfig_requested;
+static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list);
+static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue);
+static bool nographic = false;
+static int mem_prealloc; /* force preallocation of physical target memory */
+static const char *vga_model = NULL;
+static DisplayOptions dpy;
+static int num_serial_hds;
+static Chardev **serial_hds;
+static const char *log_mask;
+static const char *log_file;
+static bool list_data_dirs;
+static const char *qtest_chrdev;
+static const char *qtest_log;
+static bool opt_one_insn_per_tb;
+
+static int has_defaults = 1;
+static int default_serial = 1;
+static int default_parallel = 1;
+static int default_monitor = 1;
+static int default_floppy = 1;
+static int default_cdrom = 1;
+static int default_sdcard = 1;
+static int default_vga = 1;
+static int default_net = 1;
+
+static struct {
+    const char *driver;
+    int *flag;
+} default_list[] = {
+    { .driver = "isa-serial",           .flag = &default_serial    },
+    { .driver = "isa-parallel",         .flag = &default_parallel  },
+    { .driver = "isa-fdc",              .flag = &default_floppy    },
+    { .driver = "floppy",               .flag = &default_floppy    },
+    { .driver = "ide-cd",               .flag = &default_cdrom     },
+    { .driver = "ide-hd",               .flag = &default_cdrom     },
+    { .driver = "scsi-cd",              .flag = &default_cdrom     },
+    { .driver = "scsi-hd",              .flag = &default_cdrom     },
+    { .driver = "VGA",                  .flag = &default_vga       },
+    { .driver = "isa-vga",              .flag = &default_vga       },
+    { .driver = "cirrus-vga",           .flag = &default_vga       },
+    { .driver = "isa-cirrus-vga",       .flag = &default_vga       },
+    { .driver = "vmware-svga",          .flag = &default_vga       },
+    { .driver = "qxl-vga",              .flag = &default_vga       },
+    { .driver = "virtio-vga",           .flag = &default_vga       },
+    { .driver = "ati-vga",              .flag = &default_vga       },
+    { .driver = "vhost-user-vga",       .flag = &default_vga       },
+    { .driver = "virtio-vga-gl",        .flag = &default_vga       },
+};
+
+static QemuOptsList qemu_rtc_opts = {
+    .name = "rtc",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head),
+    .merge_lists = true,
+    .desc = {
+        {
+            .name = "base",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "clock",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "driftfix",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_option_rom_opts = {
+    .name = "option-rom",
+    .implied_opt_name = "romfile",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_option_rom_opts.head),
+    .desc = {
+        {
+            .name = "bootindex",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "romfile",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_accel_opts = {
+    .name = "accel",
+    .implied_opt_name = "accel",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head),
+    .desc = {
+        /*
+         * no elements => accept any
+         * sanity checking will happen later
+         * when setting accelerator properties
+         */
+        { }
+    },
+};
+
+static QemuOptsList qemu_boot_opts = {
+    .name = "boot-opts",
+    .implied_opt_name = "order",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_boot_opts.head),
+    .desc = {
+        {
+            .name = "order",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "once",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "menu",
+            .type = QEMU_OPT_BOOL,
+        }, {
+            .name = "splash",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "splash-time",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "reboot-timeout",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "strict",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /*End of list */ }
+    },
+};
+
+static QemuOptsList qemu_add_fd_opts = {
+    .name = "add-fd",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head),
+    .desc = {
+        {
+            .name = "fd",
+            .type = QEMU_OPT_NUMBER,
+            .help = "file descriptor of which a duplicate is added to fd set",
+        },{
+            .name = "set",
+            .type = QEMU_OPT_NUMBER,
+            .help = "ID of the fd set to add fd to",
+        },{
+            .name = "opaque",
+            .type = QEMU_OPT_STRING,
+            .help = "free-form string used to describe fd",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_object_opts = {
+    .name = "object",
+    .implied_opt_name = "qom-type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+    .desc = {
+        { }
+    },
+};
+
+static QemuOptsList qemu_tpmdev_opts = {
+    .name = "tpmdev",
+    .implied_opt_name = "type",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_tpmdev_opts.head),
+    .desc = {
+        /* options are defined in the TPM backends */
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_overcommit_opts = {
+    .name = "overcommit",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_overcommit_opts.head),
+    .desc = {
+        {
+            .name = "mem-lock",
+            .type = QEMU_OPT_BOOL,
+        },
+        {
+            .name = "cpu-pm",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_msg_opts = {
+    .name = "msg",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_msg_opts.head),
+    .desc = {
+        {
+            .name = "timestamp",
+            .type = QEMU_OPT_BOOL,
+        },
+        {
+            .name = "guest-name",
+            .type = QEMU_OPT_BOOL,
+            .help = "Prepends guest name for error messages but only if "
+                    "-name guest is set otherwise option is ignored\n",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_name_opts = {
+    .name = "name",
+    .implied_opt_name = "guest",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_name_opts.head),
+    .desc = {
+        {
+            .name = "guest",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets the name of the guest.\n"
+                    "This name will be displayed in the SDL window caption.\n"
+                    "The name will also be used for the VNC server",
+        }, {
+            .name = "process",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets the name of the QEMU process, as shown in top etc",
+        }, {
+            .name = "debug-threads",
+            .type = QEMU_OPT_BOOL,
+            .help = "When enabled, name the individual threads; defaults off.\n"
+                    "NOTE: The thread names are for debugging and not a\n"
+                    "stable API.",
+        },
+        { /* End of list */ }
+    },
+};
+
+static QemuOptsList qemu_mem_opts = {
+    .name = "memory",
+    .implied_opt_name = "size",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_opts.head),
+    .merge_lists = true,
+    .desc = {
+        {
+            .name = "size",
+            .type = QEMU_OPT_SIZE,
+        },
+        {
+            .name = "slots",
+            .type = QEMU_OPT_NUMBER,
+        },
+        {
+            .name = "maxmem",
+            .type = QEMU_OPT_SIZE,
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_icount_opts = {
+    .name = "icount",
+    .implied_opt_name = "shift",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_icount_opts.head),
+    .desc = {
+        {
+            .name = "shift",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "align",
+            .type = QEMU_OPT_BOOL,
+        }, {
+            .name = "sleep",
+            .type = QEMU_OPT_BOOL,
+        }, {
+            .name = "rr",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "rrfile",
+            .type = QEMU_OPT_STRING,
+        }, {
+            .name = "rrsnapshot",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_fw_cfg_opts = {
+    .name = "fw_cfg",
+    .implied_opt_name = "name",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_fw_cfg_opts.head),
+    .desc = {
+        {
+            .name = "name",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets the fw_cfg name of the blob to be inserted",
+        }, {
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets the name of the file from which "
+                    "the fw_cfg blob will be loaded",
+        }, {
+            .name = "string",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets content of the blob to be inserted from a string",
+        }, {
+            .name = "gen_id",
+            .type = QEMU_OPT_STRING,
+            .help = "Sets id of the object generating the fw_cfg blob "
+                    "to be inserted",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList qemu_action_opts = {
+    .name = "action",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_action_opts.head),
+    .desc = {
+        {
+            .name = "shutdown",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "reboot",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "panic",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "watchdog",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+const char *qemu_get_vm_name(void)
+{
+    return qemu_name;
+}
+
+static void default_driver_disable(const char *driver)
+{
+    int i;
+
+    if (!driver) {
+        return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(default_list); i++) {
+        if (strcmp(default_list[i].driver, driver) != 0)
+            continue;
+        *(default_list[i].flag) = 0;
+    }
+}
+
+static int default_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *driver = qemu_opt_get(opts, "driver");
+
+    default_driver_disable(driver);
+    return 0;
+}
+
+static void default_driver_check_json(void)
+{
+    DeviceOption *opt;
+
+    QTAILQ_FOREACH(opt, &device_opts, next) {
+        const char *driver = qdict_get_try_str(opt->opts, "driver");
+        default_driver_disable(driver);
+    }
+}
+
+static int parse_name(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *proc_name;
+
+    if (qemu_opt_get(opts, "debug-threads")) {
+        qemu_thread_naming(qemu_opt_get_bool(opts, "debug-threads", false));
+    }
+    qemu_name = qemu_opt_get(opts, "guest");
+
+    proc_name = qemu_opt_get(opts, "process");
+    if (proc_name) {
+        os_set_proc_name(proc_name);
+    }
+
+    return 0;
+}
+
+bool defaults_enabled(void)
+{
+    return has_defaults;
+}
+
+#ifndef _WIN32
+static int parse_add_fd(void *opaque, QemuOpts *opts, Error **errp)
+{
+    int fd, dupfd, flags;
+    int64_t fdset_id;
+    const char *fd_opaque = NULL;
+    AddfdInfo *fdinfo;
+
+    fd = qemu_opt_get_number(opts, "fd", -1);
+    fdset_id = qemu_opt_get_number(opts, "set", -1);
+    fd_opaque = qemu_opt_get(opts, "opaque");
+
+    if (fd < 0) {
+        error_setg(errp, "fd option is required and must be non-negative");
+        return -1;
+    }
+
+    if (fd <= STDERR_FILENO) {
+        error_setg(errp, "fd cannot be a standard I/O stream");
+        return -1;
+    }
+
+    /*
+     * All fds inherited across exec() necessarily have FD_CLOEXEC
+     * clear, while qemu sets FD_CLOEXEC on all other fds used internally.
+     */
+    flags = fcntl(fd, F_GETFD);
+    if (flags == -1 || (flags & FD_CLOEXEC)) {
+        error_setg(errp, "fd is not valid or already in use");
+        return -1;
+    }
+
+    if (fdset_id < 0) {
+        error_setg(errp, "set option is required and must be non-negative");
+        return -1;
+    }
+
+#ifdef F_DUPFD_CLOEXEC
+    dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    dupfd = dup(fd);
+    if (dupfd != -1) {
+        qemu_set_cloexec(dupfd);
+    }
+#endif
+    if (dupfd == -1) {
+        error_setg(errp, "error duplicating fd: %s", strerror(errno));
+        return -1;
+    }
+
+    /* add the duplicate fd, and optionally the opaque string, to the fd set */
+    fdinfo = monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque,
+                                  &error_abort);
+    g_free(fdinfo);
+
+    return 0;
+}
+
+static int cleanup_add_fd(void *opaque, QemuOpts *opts, Error **errp)
+{
+    int fd;
+
+    fd = qemu_opt_get_number(opts, "fd", -1);
+    close(fd);
+
+    return 0;
+}
+#endif
+
+/***********************************************************/
+/* QEMU Block devices */
+
+#define HD_OPTS "media=disk"
+#define CDROM_OPTS "media=cdrom"
+#define FD_OPTS ""
+#define PFLASH_OPTS ""
+#define MTD_OPTS ""
+#define SD_OPTS ""
+
+static int drive_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    BlockInterfaceType *block_default_type = opaque;
+
+    return drive_new(opts, *block_default_type, errp) == NULL;
+}
+
+static int drive_enable_snapshot(void *opaque, QemuOpts *opts, Error **errp)
+{
+    if (qemu_opt_get(opts, "snapshot") == NULL) {
+        qemu_opt_set(opts, "snapshot", "on", &error_abort);
+    }
+    return 0;
+}
+
+static void default_drive(int enable, int snapshot, BlockInterfaceType type,
+                          int index, const char *optstr)
+{
+    QemuOpts *opts;
+    DriveInfo *dinfo;
+
+    if (!enable || drive_get_by_index(type, index)) {
+        return;
+    }
+
+    opts = drive_add(type, index, NULL, optstr);
+    if (snapshot) {
+        drive_enable_snapshot(NULL, opts, NULL);
+    }
+
+    dinfo = drive_new(opts, type, &error_abort);
+    dinfo->is_default = true;
+
+}
+
+static void configure_blockdev(BlockdevOptionsQueue *bdo_queue,
+                               MachineClass *machine_class, int snapshot)
+{
+    /*
+     * If the currently selected machine wishes to override the
+     * units-per-bus property of its default HBA interface type, do so
+     * now.
+     */
+    if (machine_class->units_per_default_bus) {
+        override_max_devs(machine_class->block_default_type,
+                          machine_class->units_per_default_bus);
+    }
+
+    /* open the virtual block devices */
+    while (!QSIMPLEQ_EMPTY(bdo_queue)) {
+        BlockdevOptionsQueueEntry *bdo = QSIMPLEQ_FIRST(bdo_queue);
+
+        QSIMPLEQ_REMOVE_HEAD(bdo_queue, entry);
+        loc_push_restore(&bdo->loc);
+        qmp_blockdev_add(bdo->bdo, &error_fatal);
+        loc_pop(&bdo->loc);
+        qapi_free_BlockdevOptions(bdo->bdo);
+        g_free(bdo);
+    }
+    if (snapshot) {
+        qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot,
+                          NULL, NULL);
+    }
+    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
+                          &machine_class->block_default_type, &error_fatal)) {
+        /* We printed help */
+        exit(0);
+    }
+
+    default_drive(default_cdrom, snapshot, machine_class->block_default_type, 2,
+                  CDROM_OPTS);
+    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
+    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
+
+}
+
+static QemuOptsList qemu_smp_opts = {
+    .name = "smp-opts",
+    .implied_opt_name = "cpus",
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_smp_opts.head),
+    .desc = {
+        {
+            .name = "cpus",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "sockets",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "dies",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "clusters",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "cores",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "threads",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "maxcpus",
+            .type = QEMU_OPT_NUMBER,
+        },
+        { /*End of list */ }
+    },
+};
+
+#if defined(CONFIG_POSIX)
+static QemuOptsList qemu_run_with_opts = {
+    .name = "run-with",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head),
+    .desc = {
+#if defined(CONFIG_LINUX)
+        {
+            .name = "async-teardown",
+            .type = QEMU_OPT_BOOL,
+        },
+#endif
+        {
+            .name = "chroot",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+#define qemu_add_run_with_opts() qemu_add_opts(&qemu_run_with_opts)
+
+#else
+
+#define qemu_add_run_with_opts()
+
+#endif /* CONFIG_POSIX */
+
+static void realtime_init(void)
+{
+    if (enable_mlock) {
+        if (os_mlock() < 0) {
+            error_report("locking memory failed");
+            exit(1);
+        }
+    }
+}
+
+
+static void configure_msg(QemuOpts *opts)
+{
+    message_with_timestamp = qemu_opt_get_bool(opts, "timestamp", false);
+    error_with_guestname = qemu_opt_get_bool(opts, "guest-name", false);
+}
+
+
+/***********************************************************/
+/* USB devices */
+
+static int usb_device_add(const char *devname)
+{
+    USBDevice *dev = NULL;
+
+    if (!machine_usb(current_machine)) {
+        return -1;
+    }
+
+    dev = usbdevice_create(devname);
+    if (!dev)
+        return -1;
+
+    return 0;
+}
+
+static int usb_parse(const char *cmdline)
+{
+    int r;
+    r = usb_device_add(cmdline);
+    if (r < 0) {
+        error_report("could not add USB device '%s'", cmdline);
+    }
+    return r;
+}
+
+/***********************************************************/
+/* machine registration */
+
+static MachineClass *find_machine(const char *name, GSList *machines)
+{
+    GSList *el;
+
+    for (el = machines; el; el = el->next) {
+        MachineClass *mc = el->data;
+
+        if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) {
+            return mc;
+        }
+    }
+
+    return NULL;
+}
+
+static MachineClass *find_default_machine(GSList *machines)
+{
+    GSList *el;
+    MachineClass *default_machineclass = NULL;
+
+    for (el = machines; el; el = el->next) {
+        MachineClass *mc = el->data;
+
+        if (mc->is_default) {
+            assert(default_machineclass == NULL && "Multiple default machines");
+            default_machineclass = mc;
+        }
+    }
+
+    return default_machineclass;
+}
+
+static void version(void)
+{
+    printf("QEMU emulator version " QEMU_FULL_VERSION "\n"
+           QEMU_COPYRIGHT "\n");
+}
+
+static void help(int exitcode)
+{
+    version();
+    printf("usage: %s [options] [disk_image]\n\n"
+           "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n",
+            g_get_prgname());
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
+    if ((arch_mask) & arch_type)                               \
+        fputs(opt_help, stdout);
+
+#define ARCHHEADING(text, arch_mask) \
+    if ((arch_mask) & arch_type)    \
+        puts(stringify(text));
+
+#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
+
+#include "qemu-options.def"
+
+    printf("\nDuring emulation, the following keys are useful:\n"
+           "ctrl-alt-f      toggle full screen\n"
+           "ctrl-alt-n      switch to virtual console 'n'\n"
+           "ctrl-alt        toggle mouse and keyboard grab\n"
+           "\n"
+           "When using -nographic, press 'ctrl-a h' to get some help.\n"
+           "\n"
+           QEMU_HELP_BOTTOM "\n");
+
+    exit(exitcode);
+}
+
+enum {
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    opt_enum,
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#include "qemu-options.def"
+};
+
+#define HAS_ARG 0x0001
+
+typedef struct QEMUOption {
+    const char *name;
+    int flags;
+    int index;
+    uint32_t arch_mask;
+} QEMUOption;
+
+static const QEMUOption qemu_options[] = {
+    { "h", 0, QEMU_OPTION_h, QEMU_ARCH_ALL },
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    { option, opt_arg, opt_enum, arch_mask },
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#include "qemu-options.def"
+    { /* end of list */ }
+};
+
+typedef struct VGAInterfaceInfo {
+    const char *opt_name;    /* option name */
+    const char *name;        /* human-readable name */
+    /* Class names indicating that support is available.
+     * If no class is specified, the interface is always available */
+    const char *class_names[2];
+} VGAInterfaceInfo;
+
+static const VGAInterfaceInfo vga_interfaces[VGA_TYPE_MAX] = {
+    [VGA_NONE] = {
+        .opt_name = "none",
+        .name = "no graphic card",
+    },
+    [VGA_STD] = {
+        .opt_name = "std",
+        .name = "standard VGA",
+        .class_names = { "VGA", "isa-vga" },
+    },
+    [VGA_CIRRUS] = {
+        .opt_name = "cirrus",
+        .name = "Cirrus VGA",
+        .class_names = { "cirrus-vga", "isa-cirrus-vga" },
+    },
+    [VGA_VMWARE] = {
+        .opt_name = "vmware",
+        .name = "VMWare SVGA",
+        .class_names = { "vmware-svga" },
+    },
+    [VGA_VIRTIO] = {
+        .opt_name = "virtio",
+        .name = "Virtio VGA",
+        .class_names = { "virtio-vga" },
+    },
+    [VGA_QXL] = {
+        .opt_name = "qxl",
+        .name = "QXL VGA",
+        .class_names = { "qxl-vga" },
+    },
+    [VGA_TCX] = {
+        .opt_name = "tcx",
+        .name = "TCX framebuffer",
+        .class_names = { "sun-tcx" },
+    },
+    [VGA_CG3] = {
+        .opt_name = "cg3",
+        .name = "CG3 framebuffer",
+        .class_names = { "cgthree" },
+    },
+#ifdef CONFIG_XEN_BACKEND
+    [VGA_XENFB] = {
+        .opt_name = "xenfb",
+        .name = "Xen paravirtualized framebuffer",
+    },
+#endif
+};
+
+static bool vga_interface_available(VGAInterfaceType t)
+{
+    const VGAInterfaceInfo *ti = &vga_interfaces[t];
+
+    assert(t < VGA_TYPE_MAX);
+    return !ti->class_names[0] ||
+           module_object_class_by_name(ti->class_names[0]) ||
+           module_object_class_by_name(ti->class_names[1]);
+}
+
+static const char *
+get_default_vga_model(const MachineClass *machine_class)
+{
+    if (machine_class->default_display) {
+        for (int t = 0; t < VGA_TYPE_MAX; t++) {
+            const VGAInterfaceInfo *ti = &vga_interfaces[t];
+
+            if (ti->opt_name && vga_interface_available(t) &&
+                g_str_equal(ti->opt_name, machine_class->default_display)) {
+                return machine_class->default_display;
+            }
+        }
+
+        warn_report_once("Default display '%s' is not available in this binary",
+                         machine_class->default_display);
+        return NULL;
+    } else if (vga_interface_available(VGA_CIRRUS)) {
+        return "cirrus";
+    } else if (vga_interface_available(VGA_STD)) {
+        return "std";
+    }
+
+    return NULL;
+}
+
+static void select_vgahw(const MachineClass *machine_class, const char *p)
+{
+    const char *opts;
+    int t;
+
+    if (g_str_equal(p, "help")) {
+        const char *def = get_default_vga_model(machine_class);
+
+        for (t = 0; t < VGA_TYPE_MAX; t++) {
+            const VGAInterfaceInfo *ti = &vga_interfaces[t];
+
+            if (vga_interface_available(t) && ti->opt_name) {
+                printf("%-20s %s%s\n", ti->opt_name, ti->name ?: "",
+                        (def && g_str_equal(ti->opt_name, def)) ?
+                        " (default)" : "");
+            }
+        }
+        exit(0);
+    }
+
+    assert(vga_interface_type == VGA_NONE);
+    for (t = 0; t < VGA_TYPE_MAX; t++) {
+        const VGAInterfaceInfo *ti = &vga_interfaces[t];
+        if (ti->opt_name && strstart(p, ti->opt_name, &opts)) {
+            if (!vga_interface_available(t)) {
+                error_report("%s not available", ti->name);
+                exit(1);
+            }
+            vga_interface_type = t;
+            break;
+        }
+    }
+    if (t == VGA_TYPE_MAX) {
+    invalid_vga:
+        error_report("unknown vga type: %s", p);
+        exit(1);
+    }
+    while (*opts) {
+        const char *nextopt;
+
+        if (strstart(opts, ",retrace=", &nextopt)) {
+            opts = nextopt;
+            if (strstart(opts, "dumb", &nextopt))
+                vga_retrace_method = VGA_RETRACE_DUMB;
+            else if (strstart(opts, "precise", &nextopt))
+                vga_retrace_method = VGA_RETRACE_PRECISE;
+            else goto invalid_vga;
+        } else goto invalid_vga;
+        opts = nextopt;
+    }
+}
+
+static void parse_display_qapi(const char *optarg)
+{
+    DisplayOptions *opts;
+    Visitor *v;
+
+    v = qobject_input_visitor_new_str(optarg, "type", &error_fatal);
+
+    visit_type_DisplayOptions(v, NULL, &opts, &error_fatal);
+    QAPI_CLONE_MEMBERS(DisplayOptions, &dpy, opts);
+
+    qapi_free_DisplayOptions(opts);
+    visit_free(v);
+}
+
+DisplayOptions *qmp_query_display_options(Error **errp)
+{
+    return QAPI_CLONE(DisplayOptions, &dpy);
+}
+
+static void parse_display(const char *p)
+{
+    const char *opts;
+
+    if (is_help_option(p)) {
+        qemu_display_help();
+        exit(0);
+    }
+
+    if (strstart(p, "vnc", &opts)) {
+        /*
+         * vnc isn't a (local) DisplayType but a protocol for remote
+         * display access.
+         */
+        if (*opts == '=') {
+            vnc_parse(opts + 1);
+        } else {
+            error_report("VNC requires a display argument vnc=<display>");
+            exit(1);
+        }
+    } else {
+        parse_display_qapi(p);
+    }
+}
+
+static inline bool nonempty_str(const char *str)
+{
+    return str && *str;
+}
+
+static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp)
+{
+    gchar *buf;
+    size_t size;
+    const char *name, *file, *str, *gen_id;
+    FWCfgState *fw_cfg = (FWCfgState *) opaque;
+
+    if (fw_cfg == NULL) {
+        error_setg(errp, "fw_cfg device not available");
+        return -1;
+    }
+    name = qemu_opt_get(opts, "name");
+    file = qemu_opt_get(opts, "file");
+    str = qemu_opt_get(opts, "string");
+    gen_id = qemu_opt_get(opts, "gen_id");
+
+    /* we need the name, and exactly one of: file, content string, gen_id */
+    if (!nonempty_str(name) ||
+        nonempty_str(file) + nonempty_str(str) + nonempty_str(gen_id) != 1) {
+        error_setg(errp, "name, plus exactly one of file,"
+                         " string and gen_id, are needed");
+        return -1;
+    }
+    if (strlen(name) > FW_CFG_MAX_FILE_PATH - 1) {
+        error_setg(errp, "name too long (max. %d char)",
+                   FW_CFG_MAX_FILE_PATH - 1);
+        return -1;
+    }
+    if (nonempty_str(gen_id)) {
+        /*
+         * In this particular case where the content is populated
+         * internally, the "etc/" namespace protection is relaxed,
+         * so do not emit a warning.
+         */
+    } else if (strncmp(name, "opt/", 4) != 0) {
+        warn_report("externally provided fw_cfg item names "
+                    "should be prefixed with \"opt/\"");
+    }
+    if (nonempty_str(str)) {
+        size = strlen(str); /* NUL terminator NOT included in fw_cfg blob */
+        buf = g_memdup(str, size);
+    } else if (nonempty_str(gen_id)) {
+        if (!fw_cfg_add_from_generator(fw_cfg, name, gen_id, errp)) {
+            return -1;
+        }
+        return 0;
+    } else {
+        GError *err = NULL;
+        if (!g_file_get_contents(file, &buf, &size, &err)) {
+            error_setg(errp, "can't load %s: %s", file, err->message);
+            g_error_free(err);
+            return -1;
+        }
+    }
+    /* For legacy, keep user files in a specific global order. */
+    fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER);
+    fw_cfg_add_file(fw_cfg, name, buf, size);
+    fw_cfg_reset_order_override(fw_cfg);
+    return 0;
+}
+
+static int device_help_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    return qdev_device_help(opts);
+}
+
+static int device_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    DeviceState *dev;
+
+    dev = qdev_device_add(opts, errp);
+    if (!dev && *errp) {
+        error_report_err(*errp);
+        return -1;
+    } else if (dev) {
+        object_unref(OBJECT(dev));
+    }
+    return 0;
+}
+
+static int chardev_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!qemu_chr_new_from_opts(opts, NULL, &local_err)) {
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -1;
+        }
+        exit(0);
+    }
+    return 0;
+}
+
+#ifdef CONFIG_VIRTFS
+static int fsdev_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    return qemu_fsdev_add(opts, errp);
+}
+#endif
+
+static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    return monitor_init_opts(opts, errp);
+}
+
+static void monitor_parse(const char *optarg, const char *mode, bool pretty)
+{
+    static int monitor_device_index = 0;
+    QemuOpts *opts;
+    const char *p;
+    char label[32];
+
+    if (strstart(optarg, "chardev:", &p)) {
+        snprintf(label, sizeof(label), "%s", p);
+    } else {
+        snprintf(label, sizeof(label), "compat_monitor%d",
+                 monitor_device_index);
+        opts = qemu_chr_parse_compat(label, optarg, true);
+        if (!opts) {
+            error_report("parse error: %s", optarg);
+            exit(1);
+        }
+    }
+
+    opts = qemu_opts_create(qemu_find_opts("mon"), label, 1, &error_fatal);
+    qemu_opt_set(opts, "mode", mode, &error_abort);
+    qemu_opt_set(opts, "chardev", label, &error_abort);
+    if (!strcmp(mode, "control")) {
+        qemu_opt_set_bool(opts, "pretty", pretty, &error_abort);
+    } else {
+        assert(pretty == false);
+    }
+    monitor_device_index++;
+}
+
+struct device_config {
+    enum {
+        DEV_USB,       /* -usbdevice     */
+        DEV_SERIAL,    /* -serial        */
+        DEV_PARALLEL,  /* -parallel      */
+        DEV_DEBUGCON,  /* -debugcon */
+        DEV_GDB,       /* -gdb, -s */
+        DEV_SCLP,      /* s390 sclp */
+    } type;
+    const char *cmdline;
+    Location loc;
+    QTAILQ_ENTRY(device_config) next;
+};
+
+static QTAILQ_HEAD(, device_config) device_configs =
+    QTAILQ_HEAD_INITIALIZER(device_configs);
+
+static void add_device_config(int type, const char *cmdline)
+{
+    struct device_config *conf;
+
+    conf = g_malloc0(sizeof(*conf));
+    conf->type = type;
+    conf->cmdline = cmdline;
+    loc_save(&conf->loc);
+    QTAILQ_INSERT_TAIL(&device_configs, conf, next);
+}
+
+static int foreach_device_config(int type, int (*func)(const char *cmdline))
+{
+    struct device_config *conf;
+    int rc;
+
+    QTAILQ_FOREACH(conf, &device_configs, next) {
+        if (conf->type != type)
+            continue;
+        loc_push_restore(&conf->loc);
+        rc = func(conf->cmdline);
+        loc_pop(&conf->loc);
+        if (rc) {
+            return rc;
+        }
+    }
+    return 0;
+}
+
+static void qemu_disable_default_devices(void)
+{
+    MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+
+    default_driver_check_json();
+    qemu_opts_foreach(qemu_find_opts("device"),
+                      default_driver_check, NULL, NULL);
+    qemu_opts_foreach(qemu_find_opts("global"),
+                      default_driver_check, NULL, NULL);
+
+    if (!vga_model && !default_vga) {
+        vga_interface_type = VGA_DEVICE;
+        vga_interface_created = true;
+    }
+    if (!has_defaults || machine_class->no_serial) {
+        default_serial = 0;
+    }
+    if (!has_defaults || machine_class->no_parallel) {
+        default_parallel = 0;
+    }
+    if (!has_defaults || machine_class->no_floppy) {
+        default_floppy = 0;
+    }
+    if (!has_defaults || machine_class->no_cdrom) {
+        default_cdrom = 0;
+    }
+    if (!has_defaults || machine_class->no_sdcard) {
+        default_sdcard = 0;
+    }
+    if (!has_defaults) {
+        default_monitor = 0;
+        default_net = 0;
+        default_vga = 0;
+    } else {
+        if (default_net && machine_class->default_nic &&
+            !module_object_class_by_name(machine_class->default_nic)) {
+            warn_report("Default NIC '%s' is not available in this binary",
+                        machine_class->default_nic);
+            default_net = 0;
+        }
+    }
+}
+
+static void qemu_create_default_devices(void)
+{
+    MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+
+    if (is_daemonized()) {
+        /* According to documentation and historically, -nographic redirects
+         * serial port, parallel port and monitor to stdio, which does not work
+         * with -daemonize.  We can redirect these to null instead, but since
+         * -nographic is legacy, let's just error out.
+         * We disallow -nographic only if all other ports are not redirected
+         * explicitly, to not break existing legacy setups which uses
+         * -nographic _and_ redirects all ports explicitly - this is valid
+         * usage, -nographic is just a no-op in this case.
+         */
+        if (nographic
+            && (default_parallel || default_serial || default_monitor)) {
+            error_report("-nographic cannot be used with -daemonize");
+            exit(1);
+        }
+    }
+
+    if (nographic) {
+        if (default_parallel)
+            add_device_config(DEV_PARALLEL, "null");
+        if (default_serial && default_monitor) {
+            add_device_config(DEV_SERIAL, "mon:stdio");
+        } else {
+            if (default_serial)
+                add_device_config(DEV_SERIAL, "stdio");
+            if (default_monitor)
+                monitor_parse("stdio", "readline", false);
+        }
+    } else {
+        if (default_serial)
+            add_device_config(DEV_SERIAL, "vc:80Cx24C");
+        if (default_parallel)
+            add_device_config(DEV_PARALLEL, "vc:80Cx24C");
+        if (default_monitor)
+            monitor_parse("vc:80Cx24C", "readline", false);
+    }
+
+    if (default_net) {
+        QemuOptsList *net = qemu_find_opts("net");
+        qemu_opts_parse(net, "nic", true, &error_abort);
+#ifdef CONFIG_SLIRP
+        qemu_opts_parse(net, "user", true, &error_abort);
+#endif
+    }
+
+#if defined(CONFIG_VNC)
+    if (!QTAILQ_EMPTY(&(qemu_find_opts("vnc")->head))) {
+        display_remote++;
+    }
+#endif
+    if (dpy.type == DISPLAY_TYPE_DEFAULT && !display_remote) {
+        if (!qemu_display_find_default(&dpy)) {
+            dpy.type = DISPLAY_TYPE_NONE;
+#if defined(CONFIG_VNC)
+            vnc_parse("localhost:0,to=99,id=default");
+#endif
+        }
+    }
+    if (dpy.type == DISPLAY_TYPE_DEFAULT) {
+        dpy.type = DISPLAY_TYPE_NONE;
+    }
+
+    /* If no default VGA is requested, the default is "none".  */
+    if (default_vga) {
+        vga_model = get_default_vga_model(machine_class);
+    }
+    if (vga_model) {
+        select_vgahw(machine_class, vga_model);
+    }
+}
+
+static int serial_parse(const char *devname)
+{
+    int index = num_serial_hds;
+    char label[32];
+
+    if (strcmp(devname, "none") == 0)
+        return 0;
+    snprintf(label, sizeof(label), "serial%d", index);
+    serial_hds = g_renew(Chardev *, serial_hds, index + 1);
+
+    serial_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
+    if (!serial_hds[index]) {
+        error_report("could not connect serial device"
+                     " to character backend '%s'", devname);
+        return -1;
+    }
+    num_serial_hds++;
+    return 0;
+}
+
+Chardev *serial_hd(int i)
+{
+    assert(i >= 0);
+    if (i < num_serial_hds) {
+        return serial_hds[i];
+    }
+    return NULL;
+}
+
+static int parallel_parse(const char *devname)
+{
+    static int index = 0;
+    char label[32];
+
+    if (strcmp(devname, "none") == 0)
+        return 0;
+    if (index == MAX_PARALLEL_PORTS) {
+        error_report("too many parallel ports");
+        exit(1);
+    }
+    snprintf(label, sizeof(label), "parallel%d", index);
+    parallel_hds[index] = qemu_chr_new_mux_mon(label, devname, NULL);
+    if (!parallel_hds[index]) {
+        error_report("could not connect parallel device"
+                     " to character backend '%s'", devname);
+        return -1;
+    }
+    index++;
+    return 0;
+}
+
+static int debugcon_parse(const char *devname)
+{
+    QemuOpts *opts;
+
+    if (!qemu_chr_new_mux_mon("debugcon", devname, NULL)) {
+        error_report("invalid character backend '%s'", devname);
+        exit(1);
+    }
+    opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL);
+    if (!opts) {
+        error_report("already have a debugcon device");
+        exit(1);
+    }
+    qemu_opt_set(opts, "driver", "isa-debugcon", &error_abort);
+    qemu_opt_set(opts, "chardev", "debugcon", &error_abort);
+    return 0;
+}
+
+static gint machine_class_cmp(gconstpointer a, gconstpointer b)
+{
+    const MachineClass *mc1 = a, *mc2 = b;
+    int res;
+
+    if (mc1->family == NULL) {
+        if (mc2->family == NULL) {
+            /* Compare standalone machine types against each other; they sort
+             * in increasing order.
+             */
+            return strcmp(object_class_get_name(OBJECT_CLASS(mc1)),
+                          object_class_get_name(OBJECT_CLASS(mc2)));
+        }
+
+        /* Standalone machine types sort after families. */
+        return 1;
+    }
+
+    if (mc2->family == NULL) {
+        /* Families sort before standalone machine types. */
+        return -1;
+    }
+
+    /* Families sort between each other alphabetically increasingly. */
+    res = strcmp(mc1->family, mc2->family);
+    if (res != 0) {
+        return res;
+    }
+
+    /* Within the same family, machine types sort in decreasing order. */
+    return strcmp(object_class_get_name(OBJECT_CLASS(mc2)),
+                  object_class_get_name(OBJECT_CLASS(mc1)));
+}
+
+static void machine_help_func(const QDict *qdict)
+{
+    GSList *machines, *el;
+    const char *type = qdict_get_try_str(qdict, "type");
+
+    machines = object_class_get_list(TYPE_MACHINE, false);
+    if (type) {
+        ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines));
+        if (machine_class) {
+            type_print_class_properties(object_class_get_name(machine_class));
+            return;
+        }
+    }
+
+    printf("Supported machines are:\n");
+    machines = g_slist_sort(machines, machine_class_cmp);
+    for (el = machines; el; el = el->next) {
+        MachineClass *mc = el->data;
+        if (mc->alias) {
+            printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, mc->name);
+        }
+        printf("%-20s %s%s%s\n", mc->name, mc->desc,
+               mc->is_default ? " (default)" : "",
+               mc->deprecation_reason ? " (deprecated)" : "");
+    }
+}
+
+static void
+machine_merge_property(const char *propname, QDict *prop, Error **errp)
+{
+    QDict *opts;
+
+    opts = qdict_new();
+    /* Preserve the caller's reference to prop.  */
+    qobject_ref(prop);
+    qdict_put(opts, propname, prop);
+    keyval_merge(machine_opts_dict, opts, errp);
+    qobject_unref(opts);
+}
+
+static void
+machine_parse_property_opt(QemuOptsList *opts_list, const char *propname,
+                           const char *arg)
+{
+    QDict *prop = NULL;
+    bool help = false;
+
+    prop = keyval_parse(arg, opts_list->implied_opt_name, &help, &error_fatal);
+    if (help) {
+        qemu_opts_print_help(opts_list, true);
+        exit(0);
+    }
+    machine_merge_property(propname, prop, &error_fatal);
+    qobject_unref(prop);
+}
+
+static const char *pid_file;
+struct UnlinkPidfileNotifier {
+    Notifier notifier;
+    char *pid_file_realpath;
+};
+static struct UnlinkPidfileNotifier qemu_unlink_pidfile_notifier;
+
+static void qemu_unlink_pidfile(Notifier *n, void *data)
+{
+    struct UnlinkPidfileNotifier *upn;
+
+    upn = DO_UPCAST(struct UnlinkPidfileNotifier, notifier, n);
+    unlink(upn->pid_file_realpath);
+}
+
+static const QEMUOption *lookup_opt(int argc, char **argv,
+                                    const char **poptarg, int *poptind)
+{
+    const QEMUOption *popt;
+    int optind = *poptind;
+    char *r = argv[optind];
+    const char *optarg;
+
+    loc_set_cmdline(argv, optind, 1);
+    optind++;
+    /* Treat --foo the same as -foo.  */
+    if (r[1] == '-')
+        r++;
+    popt = qemu_options;
+    for(;;) {
+        if (!popt->name) {
+            error_report("invalid option");
+            exit(1);
+        }
+        if (!strcmp(popt->name, r + 1))
+            break;
+        popt++;
+    }
+    if (popt->flags & HAS_ARG) {
+        if (optind >= argc) {
+            error_report("requires an argument");
+            exit(1);
+        }
+        optarg = argv[optind++];
+        loc_set_cmdline(argv, optind - 2, 2);
+    } else {
+        optarg = NULL;
+    }
+
+    *poptarg = optarg;
+    *poptind = optind;
+
+    return popt;
+}
+
+static MachineClass *select_machine(QDict *qdict, Error **errp)
+{
+    const char *optarg = qdict_get_try_str(qdict, "type");
+    GSList *machines = object_class_get_list(TYPE_MACHINE, false);
+    MachineClass *machine_class;
+    Error *local_err = NULL;
+
+    if (optarg) {
+        machine_class = find_machine(optarg, machines);
+        qdict_del(qdict, "type");
+        if (!machine_class) {
+            error_setg(&local_err, "unsupported machine type");
+        }
+    } else {
+        machine_class = find_default_machine(machines);
+        if (!machine_class) {
+            error_setg(&local_err, "No machine specified, and there is no default");
+        }
+    }
+
+    g_slist_free(machines);
+    if (local_err) {
+        error_append_hint(&local_err, "Use -machine help to list supported machines\n");
+        error_propagate(errp, local_err);
+    }
+    return machine_class;
+}
+
+static int object_parse_property_opt(Object *obj,
+                                     const char *name, const char *value,
+                                     const char *skip, Error **errp)
+{
+    if (g_str_equal(name, skip)) {
+        return 0;
+    }
+
+    if (!object_property_parse(obj, name, value, errp)) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* *Non*recursively replace underscores with dashes in QDict keys.  */
+static void keyval_dashify(QDict *qdict, Error **errp)
+{
+    const QDictEntry *ent, *next;
+    char *p;
+
+    for (ent = qdict_first(qdict); ent; ent = next) {
+        g_autofree char *new_key = NULL;
+
+        next = qdict_next(qdict, ent);
+        if (!strchr(ent->key, '_')) {
+            continue;
+        }
+        new_key = g_strdup(ent->key);
+        for (p = new_key; *p; p++) {
+            if (*p == '_') {
+                *p = '-';
+            }
+        }
+        if (qdict_haskey(qdict, new_key)) {
+            error_setg(errp, "Conflict between '%s' and '%s'", ent->key, new_key);
+            return;
+        }
+        qobject_ref(ent->value);
+        qdict_put_obj(qdict, new_key, ent->value);
+        qdict_del(qdict, ent->key);
+    }
+}
+
+static void qemu_apply_legacy_machine_options(QDict *qdict)
+{
+    const char *value;
+    QObject *prop;
+
+    keyval_dashify(qdict, &error_fatal);
+
+    /* Legacy options do not correspond to MachineState properties.  */
+    value = qdict_get_try_str(qdict, "accel");
+    if (value) {
+        accelerators = g_strdup(value);
+        qdict_del(qdict, "accel");
+    }
+
+    value = qdict_get_try_str(qdict, "igd-passthru");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), "igd-passthru", value,
+                                   false);
+        qdict_del(qdict, "igd-passthru");
+    }
+
+    value = qdict_get_try_str(qdict, "kvm-shadow-mem");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kvm-shadow-mem", value,
+                                   false);
+        qdict_del(qdict, "kvm-shadow-mem");
+    }
+
+    value = qdict_get_try_str(qdict, "kernel-irqchip");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kernel-irqchip", value,
+                                   false);
+        object_register_sugar_prop(ACCEL_CLASS_NAME("whpx"), "kernel-irqchip", value,
+                                   false);
+        qdict_del(qdict, "kernel-irqchip");
+    }
+
+    value = qdict_get_try_str(qdict, "memory-backend");
+    if (value) {
+        if (mem_path) {
+            error_report("'-mem-path' can't be used together with"
+                         "'-machine memory-backend'");
+            exit(EXIT_FAILURE);
+        }
+
+        /* Resolved later.  */
+        ram_memdev_id = g_strdup(value);
+        qdict_del(qdict, "memory-backend");
+    }
+
+    prop = qdict_get(qdict, "memory");
+    if (prop) {
+        have_custom_ram_size =
+            qobject_type(prop) == QTYPE_QDICT &&
+            qdict_haskey(qobject_to(QDict, prop), "size");
+    }
+}
+
+static void object_option_foreach_add(bool (*type_opt_predicate)(const char *))
+{
+    ObjectOption *opt, *next;
+
+    QTAILQ_FOREACH_SAFE(opt, &object_opts, next, next) {
+        const char *type = ObjectType_str(opt->opts->qom_type);
+        if (type_opt_predicate(type)) {
+            user_creatable_add_qapi(opt->opts, &error_fatal);
+            qapi_free_ObjectOptions(opt->opts);
+            QTAILQ_REMOVE(&object_opts, opt, next);
+            g_free(opt);
+        }
+    }
+}
+
+static void object_option_add_visitor(Visitor *v)
+{
+    ObjectOption *opt = g_new0(ObjectOption, 1);
+    visit_type_ObjectOptions(v, NULL, &opt->opts, &error_fatal);
+    QTAILQ_INSERT_TAIL(&object_opts, opt, next);
+}
+
+static void object_option_parse(const char *optarg)
+{
+    QemuOpts *opts;
+    const char *type;
+    Visitor *v;
+
+    if (optarg[0] == '{') {
+        QObject *obj = qobject_from_json(optarg, &error_fatal);
+
+        v = qobject_input_visitor_new(obj);
+        qobject_unref(obj);
+    } else {
+        opts = qemu_opts_parse_noisily(qemu_find_opts("object"),
+                                       optarg, true);
+        if (!opts) {
+            exit(1);
+        }
+
+        type = qemu_opt_get(opts, "qom-type");
+        if (!type) {
+            error_setg(&error_fatal, QERR_MISSING_PARAMETER, "qom-type");
+        }
+        if (user_creatable_print_help(type, opts)) {
+            exit(0);
+        }
+
+        v = opts_visitor_new(opts);
+    }
+
+    object_option_add_visitor(v);
+    visit_free(v);
+}
+
+/*
+ * Very early object creation, before the sandbox options have been activated.
+ */
+static bool object_create_pre_sandbox(const char *type)
+{
+    /*
+     * Objects should in general not get initialized "too early" without
+     * a reason. If you add one, state the reason in a comment!
+     */
+
+    /*
+     * Reason: -sandbox on,resourcecontrol=deny disallows setting CPU
+     * affinity of threads.
+     */
+    if (g_str_equal(type, "thread-context")) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * Initial object creation happens before all other
+ * QEMU data types are created. The majority of objects
+ * can be created at this point. The rng-egd object
+ * cannot be created here, as it depends on the chardev
+ * already existing.
+ */
+static bool object_create_early(const char *type)
+{
+    /*
+     * Objects should not be made "delayed" without a reason.  If you
+     * add one, state the reason in a comment!
+     */
+
+    /* Reason: already created. */
+    if (object_create_pre_sandbox(type)) {
+        return false;
+    }
+
+    /* Reason: property "chardev" */
+    if (g_str_equal(type, "rng-egd") ||
+        g_str_equal(type, "qtest")) {
+        return false;
+    }
+
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+    /* Reason: cryptodev-vhost-user property "chardev" */
+    if (g_str_equal(type, "cryptodev-vhost-user")) {
+        return false;
+    }
+#endif
+
+    /* Reason: vhost-user-blk-server property "node-name" */
+    if (g_str_equal(type, "vhost-user-blk-server")) {
+        return false;
+    }
+    /*
+     * Reason: filter-* property "netdev" etc.
+     */
+    if (g_str_equal(type, "filter-buffer") ||
+        g_str_equal(type, "filter-dump") ||
+        g_str_equal(type, "filter-mirror") ||
+        g_str_equal(type, "filter-redirector") ||
+        g_str_equal(type, "colo-compare") ||
+        g_str_equal(type, "filter-rewriter") ||
+        g_str_equal(type, "filter-replay")) {
+        return false;
+    }
+
+    /*
+     * Allocation of large amounts of memory may delay
+     * chardev initialization for too long, and trigger timeouts
+     * on software that waits for a monitor socket to be created
+     * (e.g. libvirt).
+     */
+    if (g_str_has_prefix(type, "memory-backend-")) {
+        return false;
+    }
+
+    return true;
+}
+
+static void qemu_apply_machine_options(QDict *qdict)
+{
+    object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal);
+
+    if (semihosting_enabled(false) && !semihosting_get_argc()) {
+        /* fall back to the -kernel/-append */
+        semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline);
+    }
+
+    if (current_machine->smp.cpus > 1) {
+        replay_add_blocker("smp");
+    }
+}
+
+static void qemu_create_early_backends(void)
+{
+    MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+#if defined(CONFIG_SDL)
+    const bool use_sdl = (dpy.type == DISPLAY_TYPE_SDL);
+#else
+    const bool use_sdl = false;
+#endif
+#if defined(CONFIG_GTK)
+    const bool use_gtk = (dpy.type == DISPLAY_TYPE_GTK);
+#else
+    const bool use_gtk = false;
+#endif
+
+    if (dpy.has_window_close && !use_gtk && !use_sdl) {
+        error_report("window-close is only valid for GTK and SDL, "
+                     "ignoring option");
+    }
+
+    qemu_display_early_init(&dpy);
+    qemu_console_early_init();
+
+    if (dpy.has_gl && dpy.gl != DISPLAYGL_MODE_OFF && display_opengl == 0) {
+#if defined(CONFIG_OPENGL)
+        error_report("OpenGL is not supported by the display");
+#else
+        error_report("OpenGL support is disabled");
+#endif
+        exit(1);
+    }
+
+    object_option_foreach_add(object_create_early);
+
+    /* spice needs the timers to be initialized by this point */
+    /* spice must initialize before audio as it changes the default audiodev */
+    /* spice must initialize before chardevs (for spicevmc and spiceport) */
+    qemu_spice.init();
+
+    qemu_opts_foreach(qemu_find_opts("chardev"),
+                      chardev_init_func, NULL, &error_fatal);
+
+#ifdef CONFIG_VIRTFS
+    qemu_opts_foreach(qemu_find_opts("fsdev"),
+                      fsdev_init_func, NULL, &error_fatal);
+#endif
+
+    /*
+     * Note: we need to create audio and block backends before
+     * setting machine properties, so they can be referred to.
+     */
+    configure_blockdev(&bdo_queue, machine_class, snapshot);
+    audio_init_audiodevs();
+}
+
+
+/*
+ * The remainder of object creation happens after the
+ * creation of chardev, fsdev, net clients and device data types.
+ */
+static bool object_create_late(const char *type)
+{
+    return !object_create_early(type) && !object_create_pre_sandbox(type);
+}
+
+static void qemu_create_late_backends(void)
+{
+    if (qtest_chrdev) {
+        qtest_server_init(qtest_chrdev, qtest_log, &error_fatal);
+    }
+
+    net_init_clients();
+
+    object_option_foreach_add(object_create_late);
+
+    if (tpm_init() < 0) {
+        exit(1);
+    }
+
+    qemu_opts_foreach(qemu_find_opts("mon"),
+                      mon_init_func, NULL, &error_fatal);
+
+    if (foreach_device_config(DEV_SERIAL, serial_parse) < 0)
+        exit(1);
+    if (foreach_device_config(DEV_PARALLEL, parallel_parse) < 0)
+        exit(1);
+    if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
+        exit(1);
+
+    /* now chardevs have been created we may have semihosting to connect */
+    qemu_semihosting_chardev_init();
+}
+
+static void qemu_resolve_machine_memdev(void)
+{
+    if (ram_memdev_id) {
+        Object *backend;
+        ram_addr_t backend_size;
+
+        backend = object_resolve_path_type(ram_memdev_id,
+                                           TYPE_MEMORY_BACKEND, NULL);
+        if (!backend) {
+            error_report("Memory backend '%s' not found", ram_memdev_id);
+            exit(EXIT_FAILURE);
+        }
+        if (!have_custom_ram_size) {
+            backend_size = object_property_get_uint(backend, "size",  &error_abort);
+            current_machine->ram_size = backend_size;
+        }
+        object_property_set_link(OBJECT(current_machine),
+                                 "memory-backend", backend, &error_fatal);
+    }
+}
+
+static void parse_memory_options(void)
+{
+    QemuOpts *opts = qemu_find_opts_singleton("memory");
+    QDict *dict, *prop;
+    const char *mem_str;
+    Location loc;
+
+    loc_push_none(&loc);
+    qemu_opts_loc_restore(opts);
+
+    prop = qdict_new();
+
+    if (qemu_opt_get_size(opts, "size", 0) != 0) {
+        /* Fix up legacy suffix-less format */
+        mem_str = qemu_opt_get(opts, "size");
+        if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) {
+            g_autofree char *mib_str = g_strdup_printf("%sM", mem_str);
+            qdict_put_str(prop, "size", mib_str);
+        } else {
+            qdict_put_str(prop, "size", mem_str);
+        }
+    }
+
+    if (qemu_opt_get(opts, "maxmem")) {
+        qdict_put_str(prop, "max-size", qemu_opt_get(opts, "maxmem"));
+    }
+    if (qemu_opt_get(opts, "slots")) {
+        qdict_put_str(prop, "slots", qemu_opt_get(opts, "slots"));
+    }
+
+    dict = qdict_new();
+    qdict_put(dict, "memory", prop);
+    keyval_merge(machine_opts_dict, dict, &error_fatal);
+    qobject_unref(dict);
+    loc_pop(&loc);
+}
+
+static void qemu_create_machine(QDict *qdict)
+{
+    MachineClass *machine_class = select_machine(qdict, &error_fatal);
+    object_set_machine_compat_props(machine_class->compat_props);
+
+    current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class)));
+    object_property_add_child(object_get_root(), "machine",
+                              OBJECT(current_machine));
+    object_property_add_child(container_get(OBJECT(current_machine),
+                                            "/unattached"),
+                              "sysbus", OBJECT(sysbus_get_default()));
+
+    if (machine_class->minimum_page_bits) {
+        if (!set_preferred_target_page_bits(machine_class->minimum_page_bits)) {
+            /* This would be a board error: specifying a minimum smaller than
+             * a target's compile-time fixed setting.
+             */
+            g_assert_not_reached();
+        }
+    }
+
+    cpu_exec_init_all();
+    page_size_init();
+
+    if (machine_class->hw_version) {
+        qemu_set_hw_version(machine_class->hw_version);
+    }
+
+    /*
+     * Get the default machine options from the machine if it is not already
+     * specified either by the configuration file or by the command line.
+     */
+    if (machine_class->default_machine_opts) {
+        QDict *default_opts =
+            keyval_parse(machine_class->default_machine_opts, NULL, NULL,
+                         &error_abort);
+        qemu_apply_legacy_machine_options(default_opts);
+        object_set_properties_from_keyval(OBJECT(current_machine), default_opts,
+                                          false, &error_abort);
+        qobject_unref(default_opts);
+    }
+}
+
+static int global_init_func(void *opaque, QemuOpts *opts, Error **errp)
+{
+    GlobalProperty *g;
+
+    g = g_malloc0(sizeof(*g));
+    g->driver   = qemu_opt_get(opts, "driver");
+    g->property = qemu_opt_get(opts, "property");
+    g->value    = qemu_opt_get(opts, "value");
+    qdev_prop_register_global(g);
+    return 0;
+}
+
+/*
+ * Return whether configuration group @group is stored in QemuOpts, or
+ * recorded as one or more QDicts by qemu_record_config_group.
+ */
+static bool is_qemuopts_group(const char *group)
+{
+    if (g_str_equal(group, "object") ||
+        g_str_equal(group, "audiodev") ||
+        g_str_equal(group, "machine") ||
+        g_str_equal(group, "smp-opts") ||
+        g_str_equal(group, "boot-opts")) {
+        return false;
+    }
+    return true;
+}
+
+static void qemu_record_config_group(const char *group, QDict *dict,
+                                     bool from_json, Error **errp)
+{
+    if (g_str_equal(group, "object")) {
+        Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict));
+        object_option_add_visitor(v);
+        visit_free(v);
+
+    } else if (g_str_equal(group, "audiodev")) {
+        Audiodev *dev = NULL;
+        Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict));
+        if (visit_type_Audiodev(v, NULL, &dev, errp)) {
+            audio_define(dev);
+        }
+        visit_free(v);
+
+    } else if (g_str_equal(group, "machine")) {
+        /*
+         * Cannot merge string-valued and type-safe dictionaries, so JSON
+         * is not accepted yet for -M.
+         */
+        assert(!from_json);
+        keyval_merge(machine_opts_dict, dict, errp);
+    } else if (g_str_equal(group, "smp-opts")) {
+        machine_merge_property("smp", dict, &error_fatal);
+    } else if (g_str_equal(group, "boot-opts")) {
+        machine_merge_property("boot", dict, &error_fatal);
+    } else {
+        abort();
+    }
+}
+
+/*
+ * Parse non-QemuOpts config file groups, pass the rest to
+ * qemu_config_do_parse.
+ */
+static void qemu_parse_config_group(const char *group, QDict *qdict,
+                                    void *opaque, Error **errp)
+{
+    QObject *crumpled;
+    if (is_qemuopts_group(group)) {
+        qemu_config_do_parse(group, qdict, opaque, errp);
+        return;
+    }
+
+    crumpled = qdict_crumple(qdict, errp);
+    if (!crumpled) {
+        return;
+    }
+    switch (qobject_type(crumpled)) {
+    case QTYPE_QDICT:
+        qemu_record_config_group(group, qobject_to(QDict, crumpled), false, errp);
+        break;
+    case QTYPE_QLIST:
+        error_setg(errp, "Lists cannot be at top level of a configuration section");
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    qobject_unref(crumpled);
+}
+
+static void qemu_read_default_config_file(Error **errp)
+{
+    ERRP_GUARD();
+    int ret;
+    g_autofree char *file = get_relocated_path(CONFIG_QEMU_CONFDIR "/qemu.conf");
+
+    ret = qemu_read_config_file(file, qemu_parse_config_group, errp);
+    if (ret < 0) {
+        if (ret == -ENOENT) {
+            error_free(*errp);
+            *errp = NULL;
+        }
+    }
+}
+
+static void qemu_set_option(const char *str, Error **errp)
+{
+    char group[64], id[64], arg[64];
+    QemuOptsList *list;
+    QemuOpts *opts;
+    int rc, offset;
+
+    rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
+    if (rc < 3 || str[offset] != '=') {
+        error_setg(errp, "can't parse: \"%s\"", str);
+        return;
+    }
+
+    if (!is_qemuopts_group(group)) {
+        error_setg(errp, "-set is not supported with %s", group);
+    } else {
+        list = qemu_find_opts_err(group, errp);
+        if (list) {
+            opts = qemu_opts_find(list, id);
+            if (!opts) {
+                error_setg(errp, "there is no %s \"%s\" defined", group, id);
+                return;
+            }
+            qemu_opt_set(opts, arg, str + offset + 1, errp);
+        }
+    }
+}
+
+static void user_register_global_props(void)
+{
+    qemu_opts_foreach(qemu_find_opts("global"),
+                      global_init_func, NULL, NULL);
+}
+
+static int do_configure_icount(void *opaque, QemuOpts *opts, Error **errp)
+{
+    icount_configure(opts, errp);
+    return 0;
+}
+
+static int accelerator_set_property(void *opaque,
+                                const char *name, const char *value,
+                                Error **errp)
+{
+    return object_parse_property_opt(opaque, name, value, "accel", errp);
+}
+
+static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
+{
+    bool *p_init_failed = opaque;
+    const char *acc = qemu_opt_get(opts, "accel");
+    AccelClass *ac = accel_find(acc);
+    AccelState *accel;
+    int ret;
+    bool qtest_with_kvm;
+
+    if (!acc) {
+        error_setg(errp, QERR_MISSING_PARAMETER, "accel");
+        goto bad;
+    }
+
+    qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL;
+
+    if (!ac) {
+        if (!qtest_with_kvm) {
+            error_report("invalid accelerator %s", acc);
+        }
+        goto bad;
+    }
+    accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac)));
+    object_apply_compat_props(OBJECT(accel));
+    qemu_opt_foreach(opts, accelerator_set_property,
+                     accel,
+                     &error_fatal);
+    /*
+     * If legacy -singlestep option is set, honour it for TCG and
+     * silently ignore for any other accelerator (which is how this
+     * option has always behaved).
+     */
+    if (opt_one_insn_per_tb) {
+        /*
+         * This will always succeed for TCG, and we want to ignore
+         * the error from trying to set a nonexistent property
+         * on any other accelerator.
+         */
+        object_property_set_bool(OBJECT(accel), "one-insn-per-tb", true, NULL);
+    }
+    ret = accel_init_machine(accel, current_machine);
+    if (ret < 0) {
+        if (!qtest_with_kvm || ret != -ENOENT) {
+            error_report("failed to initialize %s: %s", acc, strerror(-ret));
+        }
+        goto bad;
+    }
+
+    return 1;
+
+bad:
+    *p_init_failed = true;
+    return 0;
+}
+
+static void configure_accelerators(const char *progname)
+{
+    bool init_failed = false;
+
+    qemu_opts_foreach(qemu_find_opts("icount"),
+                      do_configure_icount, NULL, &error_fatal);
+
+    if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
+        char **accel_list, **tmp;
+
+        if (accelerators == NULL) {
+            /* Select the default accelerator */
+            bool have_tcg = accel_find("tcg");
+            bool have_kvm = accel_find("kvm");
+
+            if (have_tcg && have_kvm) {
+                if (g_str_has_suffix(progname, "kvm")) {
+                    /* If the program name ends with "kvm", we prefer KVM */
+                    accelerators = "kvm:tcg";
+                } else {
+                    accelerators = "tcg:kvm";
+                }
+            } else if (have_kvm) {
+                accelerators = "kvm";
+            } else if (have_tcg) {
+                accelerators = "tcg";
+            } else {
+                error_report("No accelerator selected and"
+                             " no default accelerator available");
+                exit(1);
+            }
+        }
+        accel_list = g_strsplit(accelerators, ":", 0);
+
+        for (tmp = accel_list; *tmp; tmp++) {
+            /*
+             * Filter invalid accelerators here, to prevent obscenities
+             * such as "-machine accel=tcg,,thread=single".
+             */
+            if (accel_find(*tmp)) {
+                qemu_opts_parse_noisily(qemu_find_opts("accel"), *tmp, true);
+            } else {
+                init_failed = true;
+                error_report("invalid accelerator %s", *tmp);
+            }
+        }
+        g_strfreev(accel_list);
+    } else {
+        if (accelerators != NULL) {
+            error_report("The -accel and \"-machine accel=\" options are incompatible");
+            exit(1);
+        }
+    }
+
+    if (!qemu_opts_foreach(qemu_find_opts("accel"),
+                           do_configure_accelerator, &init_failed, &error_fatal)) {
+        if (!init_failed) {
+            error_report("no accelerator found");
+        }
+        exit(1);
+    }
+
+    if (init_failed && !qtest_chrdev) {
+        error_report("falling back to %s", current_accel_name());
+    }
+
+    if (icount_enabled() && !tcg_enabled()) {
+        error_report("-icount is not allowed with hardware virtualization");
+        exit(1);
+    }
+}
+
+static void qemu_validate_options(const QDict *machine_opts)
+{
+    const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel");
+    const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd");
+    const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append");
+
+    if (kernel_filename == NULL) {
+         if (kernel_cmdline != NULL) {
+              error_report("-append only allowed with -kernel option");
+              exit(1);
+          }
+
+          if (initrd_filename != NULL) {
+              error_report("-initrd only allowed with -kernel option");
+              exit(1);
+          }
+    }
+
+    if (loadvm && preconfig_requested) {
+        error_report("'preconfig' and 'loadvm' options are "
+                     "mutually exclusive");
+        exit(EXIT_FAILURE);
+    }
+    if (incoming && preconfig_requested && strcmp(incoming, "defer") != 0) {
+        error_report("'preconfig' supports '-incoming defer' only");
+        exit(EXIT_FAILURE);
+    }
+
+#ifdef CONFIG_CURSES
+    if (is_daemonized() && dpy.type == DISPLAY_TYPE_CURSES) {
+        error_report("curses display cannot be used with -daemonize");
+        exit(1);
+    }
+#endif
+}
+
+static void qemu_process_sugar_options(void)
+{
+    if (mem_prealloc) {
+        QObject *smp = qdict_get(machine_opts_dict, "smp");
+        if (smp && qobject_type(smp) == QTYPE_QDICT) {
+            QObject *cpus = qdict_get(qobject_to(QDict, smp), "cpus");
+            if (cpus && qobject_type(cpus) == QTYPE_QSTRING) {
+                const char *val = qstring_get_str(qobject_to(QString, cpus));
+                object_register_sugar_prop("memory-backend", "prealloc-threads",
+                                           val, false);
+            }
+        }
+        object_register_sugar_prop("memory-backend", "prealloc", "on", false);
+    }
+}
+
+/* -action processing */
+
+/*
+ * Process all the -action parameters parsed from cmdline.
+ */
+static int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp)
+{
+    Error *local_err = NULL;
+    QDict *qdict = qemu_opts_to_qdict(opts, NULL);
+    QObject *ret = NULL;
+    qmp_marshal_set_action(qdict, &ret, &local_err);
+    qobject_unref(ret);
+    qobject_unref(qdict);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return 1;
+    }
+    return 0;
+}
+
+static void qemu_process_early_options(void)
+{
+    qemu_opts_foreach(qemu_find_opts("name"),
+                      parse_name, NULL, &error_fatal);
+
+    object_option_foreach_add(object_create_pre_sandbox);
+
+#ifdef CONFIG_SECCOMP
+    QemuOptsList *olist = qemu_find_opts_err("sandbox", NULL);
+    if (olist) {
+        qemu_opts_foreach(olist, parse_sandbox, NULL, &error_fatal);
+    }
+#endif
+
+    if (qemu_opts_foreach(qemu_find_opts("action"),
+                          process_runstate_actions, NULL, &error_fatal)) {
+        exit(1);
+    }
+
+#ifndef _WIN32
+    qemu_opts_foreach(qemu_find_opts("add-fd"),
+                      parse_add_fd, NULL, &error_fatal);
+
+    qemu_opts_foreach(qemu_find_opts("add-fd"),
+                      cleanup_add_fd, NULL, &error_fatal);
+#endif
+
+    /* Open the logfile at this point and set the log mask if necessary.  */
+    {
+        int mask = 0;
+        if (log_mask) {
+            mask = qemu_str_to_log_mask(log_mask);
+            if (!mask) {
+                qemu_print_log_usage(stdout);
+                exit(1);
+            }
+        }
+        qemu_set_log_filename_flags(log_file, mask, &error_fatal);
+    }
+
+    qemu_add_default_firmwarepath();
+}
+
+static void qemu_process_help_options(void)
+{
+    /*
+     * Check for -cpu help and -device help before we call select_machine(),
+     * which will return an error if the architecture has no default machine
+     * type and the user did not specify one, so that the user doesn't need
+     * to say '-cpu help -machine something'.
+     */
+    if (cpu_option && is_help_option(cpu_option)) {
+        list_cpus();
+        exit(0);
+    }
+
+    if (qemu_opts_foreach(qemu_find_opts("device"),
+                          device_help_func, NULL, NULL)) {
+        exit(0);
+    }
+
+    /* -L help lists the data directories and exits. */
+    if (list_data_dirs) {
+        qemu_list_data_dirs();
+        exit(0);
+    }
+}
+
+static void qemu_maybe_daemonize(const char *pid_file)
+{
+    Error *err = NULL;
+
+    os_daemonize();
+    rcu_disable_atfork();
+
+    if (pid_file) {
+        char *pid_file_realpath = NULL;
+
+        if (!qemu_write_pidfile(pid_file, &err)) {
+            error_reportf_err(err, "cannot create PID file: ");
+            exit(1);
+        }
+
+        pid_file_realpath = g_malloc0(PATH_MAX);
+        if (!realpath(pid_file, pid_file_realpath)) {
+            if (errno != ENOENT) {
+                warn_report("not removing PID file on exit: cannot resolve PID "
+                            "file path: %s: %s", pid_file, strerror(errno));
+            }
+            return;
+        }
+
+        qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) {
+            .notifier = {
+                .notify = qemu_unlink_pidfile,
+            },
+            .pid_file_realpath = pid_file_realpath,
+        };
+        qemu_add_exit_notifier(&qemu_unlink_pidfile_notifier.notifier);
+    }
+}
+
+static void qemu_init_displays(void)
+{
+    DisplayState *ds;
+
+    /* init local displays */
+    ds = init_displaystate();
+    qemu_display_init(ds, &dpy);
+
+    /* must be after terminal init, SDL library changes signal handlers */
+    os_setup_signal_handling();
+
+    /* init remote displays */
+#ifdef CONFIG_VNC
+    qemu_opts_foreach(qemu_find_opts("vnc"),
+                      vnc_init_func, NULL, &error_fatal);
+#endif
+
+    if (using_spice) {
+        qemu_spice.display_init();
+    }
+}
+
+static void qemu_init_board(void)
+{
+    /* process plugin before CPUs are created, but once -smp has been parsed */
+    qemu_plugin_load_list(&plugin_list, &error_fatal);
+
+    /* From here on we enter MACHINE_PHASE_INITIALIZED.  */
+    machine_run_board_init(current_machine, mem_path, &error_fatal);
+
+    drive_check_orphaned();
+
+    realtime_init();
+}
+
+static void qemu_create_cli_devices(void)
+{
+    DeviceOption *opt;
+
+    soundhw_init();
+
+    qemu_opts_foreach(qemu_find_opts("fw_cfg"),
+                      parse_fw_cfg, fw_cfg_find(), &error_fatal);
+
+    /* init USB devices */
+    if (machine_usb(current_machine)) {
+        if (foreach_device_config(DEV_USB, usb_parse) < 0)
+            exit(1);
+    }
+
+    /* init generic devices */
+    rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE);
+    qemu_opts_foreach(qemu_find_opts("device"),
+                      device_init_func, NULL, &error_fatal);
+    QTAILQ_FOREACH(opt, &device_opts, next) {
+        DeviceState *dev;
+        loc_push_restore(&opt->loc);
+        /*
+         * TODO Eventually we should call qmp_device_add() here to make sure it
+         * behaves the same, but QMP still has to accept incorrectly typed
+         * options until libvirt is fixed and we want to be strict on the CLI
+         * from the start, so call qdev_device_add_from_qdict() directly for
+         * now.
+         */
+        dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal);
+        object_unref(OBJECT(dev));
+        loc_pop(&opt->loc);
+    }
+    rom_reset_order_override();
+}
+
+static void qemu_machine_creation_done(void)
+{
+    MachineState *machine = MACHINE(qdev_get_machine());
+
+    /* Did we create any drives that we failed to create a device for? */
+    drive_check_orphaned();
+
+    /* Don't warn about the default network setup that you get if
+     * no command line -net or -netdev options are specified. There
+     * are two cases that we would otherwise complain about:
+     * (1) board doesn't support a NIC but the implicit "-net nic"
+     * requested one
+     * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic"
+     * sets up a nic that isn't connected to anything.
+     */
+    if (!default_net && (!qtest_enabled() || has_defaults)) {
+        net_check_clients();
+    }
+
+    qdev_prop_check_globals();
+
+    qdev_machine_creation_done();
+
+    if (machine->cgs) {
+        /*
+         * Verify that Confidential Guest Support has actually been initialized
+         */
+        assert(machine->cgs->ready);
+    }
+
+    if (foreach_device_config(DEV_GDB, gdbserver_start) < 0) {
+        exit(1);
+    }
+    if (!vga_interface_created && !default_vga &&
+        vga_interface_type != VGA_NONE) {
+        warn_report("A -vga option was passed but this machine "
+                    "type does not use that option; "
+                    "No VGA device has been created");
+    }
+}
+
+void qmp_x_exit_preconfig(Error **errp)
+{
+    if (phase_check(PHASE_MACHINE_INITIALIZED)) {
+        error_setg(errp, "The command is permitted only before machine initialization");
+        return;
+    }
+
+    qemu_init_board();
+    qemu_create_cli_devices();
+    qemu_machine_creation_done();
+
+    if (loadvm) {
+        load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+    }
+    if (replay_mode != REPLAY_MODE_NONE) {
+        replay_vmstate_init();
+    }
+
+    if (incoming) {
+        Error *local_err = NULL;
+        if (strcmp(incoming, "defer") != 0) {
+            qmp_migrate_incoming(incoming, &local_err);
+            if (local_err) {
+                error_reportf_err(local_err, "-incoming %s: ", incoming);
+                exit(1);
+            }
+        }
+    } else if (autostart) {
+        qmp_cont(NULL);
+    }
+}
+
+void qemu_init(int argc, char **argv)
+{
+    QemuOpts *opts;
+    QemuOpts *icount_opts = NULL, *accel_opts = NULL;
+    QemuOptsList *olist;
+    int optind;
+    const char *optarg;
+    MachineClass *machine_class;
+    bool userconfig = true;
+    FILE *vmstate_dump_file = NULL;
+
+    qemu_add_opts(&qemu_drive_opts);
+    qemu_add_drive_opts(&qemu_legacy_drive_opts);
+    qemu_add_drive_opts(&qemu_common_drive_opts);
+    qemu_add_drive_opts(&qemu_drive_opts);
+    qemu_add_drive_opts(&bdrv_runtime_opts);
+    qemu_add_opts(&qemu_chardev_opts);
+    qemu_add_opts(&qemu_device_opts);
+    qemu_add_opts(&qemu_netdev_opts);
+    qemu_add_opts(&qemu_nic_opts);
+    qemu_add_opts(&qemu_net_opts);
+    qemu_add_opts(&qemu_rtc_opts);
+    qemu_add_opts(&qemu_global_opts);
+    qemu_add_opts(&qemu_mon_opts);
+    qemu_add_opts(&qemu_trace_opts);
+    qemu_plugin_add_opts();
+    qemu_add_opts(&qemu_option_rom_opts);
+    qemu_add_opts(&qemu_accel_opts);
+    qemu_add_opts(&qemu_mem_opts);
+    qemu_add_opts(&qemu_smp_opts);
+    qemu_add_opts(&qemu_boot_opts);
+    qemu_add_opts(&qemu_add_fd_opts);
+    qemu_add_opts(&qemu_object_opts);
+    qemu_add_opts(&qemu_tpmdev_opts);
+    qemu_add_opts(&qemu_overcommit_opts);
+    qemu_add_opts(&qemu_msg_opts);
+    qemu_add_opts(&qemu_name_opts);
+    qemu_add_opts(&qemu_numa_opts);
+    qemu_add_opts(&qemu_icount_opts);
+    qemu_add_opts(&qemu_semihosting_config_opts);
+    qemu_add_opts(&qemu_fw_cfg_opts);
+    qemu_add_opts(&qemu_action_opts);
+    qemu_add_run_with_opts();
+    module_call_init(MODULE_INIT_OPTS);
+
+    error_init(argv[0]);
+    qemu_init_exec_dir(argv[0]);
+
+    qemu_init_arch_modules();
+
+    qemu_init_subsystems();
+
+    /* first pass of option parsing */
+    optind = 1;
+    while (optind < argc) {
+        if (argv[optind][0] != '-') {
+            /* disk image */
+            optind++;
+        } else {
+            const QEMUOption *popt;
+
+            popt = lookup_opt(argc, argv, &optarg, &optind);
+            switch (popt->index) {
+            case QEMU_OPTION_nouserconfig:
+                userconfig = false;
+                break;
+            }
+        }
+    }
+
+    machine_opts_dict = qdict_new();
+    if (userconfig) {
+        qemu_read_default_config_file(&error_fatal);
+    }
+
+    /* second pass of option parsing */
+    optind = 1;
+    for(;;) {
+        if (optind >= argc)
+            break;
+        if (argv[optind][0] != '-') {
+            loc_set_cmdline(argv, optind, 1);
+            drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
+        } else {
+            const QEMUOption *popt;
+
+            popt = lookup_opt(argc, argv, &optarg, &optind);
+            if (!(popt->arch_mask & arch_type)) {
+                error_report("Option not supported for this target");
+                exit(1);
+            }
+            switch(popt->index) {
+            case QEMU_OPTION_cpu:
+                /* hw initialization will check this */
+                cpu_option = optarg;
+                break;
+            case QEMU_OPTION_hda:
+            case QEMU_OPTION_hdb:
+            case QEMU_OPTION_hdc:
+            case QEMU_OPTION_hdd:
+                drive_add(IF_DEFAULT, popt->index - QEMU_OPTION_hda, optarg,
+                          HD_OPTS);
+                break;
+            case QEMU_OPTION_blockdev:
+                {
+                    Visitor *v;
+                    BlockdevOptionsQueueEntry *bdo;
+
+                    v = qobject_input_visitor_new_str(optarg, "driver",
+                                                      &error_fatal);
+
+                    bdo = g_new(BlockdevOptionsQueueEntry, 1);
+                    visit_type_BlockdevOptions(v, NULL, &bdo->bdo,
+                                               &error_fatal);
+                    visit_free(v);
+                    loc_save(&bdo->loc);
+                    QSIMPLEQ_INSERT_TAIL(&bdo_queue, bdo, entry);
+                    break;
+                }
+            case QEMU_OPTION_drive:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("drive"),
+                                               optarg, false);
+                if (opts == NULL) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_set:
+                qemu_set_option(optarg, &error_fatal);
+                break;
+            case QEMU_OPTION_global:
+                if (qemu_global_option(optarg) != 0)
+                    exit(1);
+                break;
+            case QEMU_OPTION_mtdblock:
+                drive_add(IF_MTD, -1, optarg, MTD_OPTS);
+                break;
+            case QEMU_OPTION_sd:
+                drive_add(IF_SD, -1, optarg, SD_OPTS);
+                break;
+            case QEMU_OPTION_pflash:
+                drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS);
+                break;
+            case QEMU_OPTION_snapshot:
+                snapshot = 1;
+                replay_add_blocker("-snapshot");
+                break;
+            case QEMU_OPTION_numa:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
+                                               optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_display:
+                parse_display(optarg);
+                break;
+            case QEMU_OPTION_nographic:
+                qdict_put_str(machine_opts_dict, "graphics", "off");
+                nographic = true;
+                dpy.type = DISPLAY_TYPE_NONE;
+                break;
+            case QEMU_OPTION_portrait:
+                graphic_rotate = 90;
+                break;
+            case QEMU_OPTION_rotate:
+                graphic_rotate = strtol(optarg, (char **) &optarg, 10);
+                if (graphic_rotate != 0 && graphic_rotate != 90 &&
+                    graphic_rotate != 180 && graphic_rotate != 270) {
+                    error_report("only 90, 180, 270 deg rotation is available");
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_kernel:
+                qdict_put_str(machine_opts_dict, "kernel", optarg);
+                break;
+            case QEMU_OPTION_initrd:
+                qdict_put_str(machine_opts_dict, "initrd", optarg);
+                break;
+            case QEMU_OPTION_append:
+                qdict_put_str(machine_opts_dict, "append", optarg);
+                break;
+            case QEMU_OPTION_dtb:
+                qdict_put_str(machine_opts_dict, "dtb", optarg);
+                break;
+            case QEMU_OPTION_cdrom:
+                drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS);
+                break;
+            case QEMU_OPTION_boot:
+                machine_parse_property_opt(qemu_find_opts("boot-opts"), "boot", optarg);
+                break;
+            case QEMU_OPTION_fda:
+            case QEMU_OPTION_fdb:
+                drive_add(IF_FLOPPY, popt->index - QEMU_OPTION_fda,
+                          optarg, FD_OPTS);
+                break;
+            case QEMU_OPTION_no_fd_bootchk:
+                fd_bootchk = 0;
+                break;
+            case QEMU_OPTION_netdev:
+                default_net = 0;
+                if (netdev_is_modern(optarg)) {
+                    netdev_parse_modern(optarg);
+                } else {
+                    net_client_parse(qemu_find_opts("netdev"), optarg);
+                }
+                break;
+            case QEMU_OPTION_nic:
+                default_net = 0;
+                net_client_parse(qemu_find_opts("nic"), optarg);
+                break;
+            case QEMU_OPTION_net:
+                default_net = 0;
+                net_client_parse(qemu_find_opts("net"), optarg);
+                break;
+#ifdef CONFIG_LIBISCSI
+            case QEMU_OPTION_iscsi:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("iscsi"),
+                                               optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+#endif
+            case QEMU_OPTION_audiodev:
+                audio_parse_option(optarg);
+                break;
+            case QEMU_OPTION_audio: {
+                bool help;
+                char *model;
+                Audiodev *dev = NULL;
+                Visitor *v;
+                QDict *dict = keyval_parse(optarg, "driver", &help, &error_fatal);
+                if (help || (qdict_haskey(dict, "driver") &&
+                             is_help_option(qdict_get_str(dict, "driver")))) {
+                    audio_help();
+                    exit(EXIT_SUCCESS);
+                }
+                if (!qdict_haskey(dict, "id")) {
+                    qdict_put_str(dict, "id", "audiodev0");
+                }
+                if (!qdict_haskey(dict, "model")) {
+                    error_setg(&error_fatal, "Parameter 'model' is missing");
+                }
+                model = g_strdup(qdict_get_str(dict, "model"));
+                qdict_del(dict, "model");
+                if (is_help_option(model)) {
+                    show_valid_soundhw();
+                    exit(0);
+                }
+                v = qobject_input_visitor_new_keyval(QOBJECT(dict));
+                qobject_unref(dict);
+                visit_type_Audiodev(v, NULL, &dev, &error_fatal);
+                visit_free(v);
+                audio_define(dev);
+                select_soundhw(model, dev->id);
+                g_free(model);
+                break;
+            }
+            case QEMU_OPTION_h:
+                help(0);
+                break;
+            case QEMU_OPTION_version:
+                version();
+                exit(0);
+                break;
+            case QEMU_OPTION_m:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true);
+                if (opts == NULL) {
+                    exit(1);
+                }
+                break;
+#ifdef CONFIG_TPM
+            case QEMU_OPTION_tpmdev:
+                if (tpm_config_parse(qemu_find_opts("tpmdev"), optarg) < 0) {
+                    exit(1);
+                }
+                break;
+#endif
+            case QEMU_OPTION_mempath:
+                mem_path = optarg;
+                break;
+            case QEMU_OPTION_mem_prealloc:
+                mem_prealloc = 1;
+                break;
+            case QEMU_OPTION_d:
+                log_mask = optarg;
+                break;
+            case QEMU_OPTION_D:
+                log_file = optarg;
+                break;
+            case QEMU_OPTION_DFILTER:
+                qemu_set_dfilter_ranges(optarg, &error_fatal);
+                break;
+#if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
+            case QEMU_OPTION_perfmap:
+                perf_enable_perfmap();
+                break;
+            case QEMU_OPTION_jitdump:
+                perf_enable_jitdump();
+                break;
+#endif
+            case QEMU_OPTION_seed:
+                qemu_guest_random_seed_main(optarg, &error_fatal);
+                break;
+            case QEMU_OPTION_s:
+                add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT);
+                break;
+            case QEMU_OPTION_gdb:
+                add_device_config(DEV_GDB, optarg);
+                break;
+            case QEMU_OPTION_L:
+                if (is_help_option(optarg)) {
+                    list_data_dirs = true;
+                } else {
+                    qemu_add_data_dir(g_strdup(optarg));
+                }
+                break;
+            case QEMU_OPTION_bios:
+                qdict_put_str(machine_opts_dict, "firmware", optarg);
+                break;
+            case QEMU_OPTION_singlestep:
+                opt_one_insn_per_tb = true;
+                break;
+            case QEMU_OPTION_S:
+                autostart = 0;
+                break;
+            case QEMU_OPTION_k:
+                keyboard_layout = optarg;
+                break;
+            case QEMU_OPTION_vga:
+                vga_model = optarg;
+                default_vga = 0;
+                break;
+            case QEMU_OPTION_g:
+                {
+                    const char *p;
+                    int w, h, depth;
+                    p = optarg;
+                    w = strtol(p, (char **)&p, 10);
+                    if (w <= 0) {
+                    graphic_error:
+                        error_report("invalid resolution or depth");
+                        exit(1);
+                    }
+                    if (*p != 'x')
+                        goto graphic_error;
+                    p++;
+                    h = strtol(p, (char **)&p, 10);
+                    if (h <= 0)
+                        goto graphic_error;
+                    if (*p == 'x') {
+                        p++;
+                        depth = strtol(p, (char **)&p, 10);
+                        if (depth != 1 && depth != 2 && depth != 4 &&
+                            depth != 8 && depth != 15 && depth != 16 &&
+                            depth != 24 && depth != 32)
+                            goto graphic_error;
+                    } else if (*p == '\0') {
+                        depth = graphic_depth;
+                    } else {
+                        goto graphic_error;
+                    }
+
+                    graphic_width = w;
+                    graphic_height = h;
+                    graphic_depth = depth;
+                }
+                break;
+            case QEMU_OPTION_echr:
+                {
+                    char *r;
+                    term_escape_char = strtol(optarg, &r, 0);
+                    if (r == optarg)
+                        printf("Bad argument to echr\n");
+                    break;
+                }
+            case QEMU_OPTION_monitor:
+                default_monitor = 0;
+                if (strncmp(optarg, "none", 4)) {
+                    monitor_parse(optarg, "readline", false);
+                }
+                break;
+            case QEMU_OPTION_qmp:
+                monitor_parse(optarg, "control", false);
+                default_monitor = 0;
+                break;
+            case QEMU_OPTION_qmp_pretty:
+                monitor_parse(optarg, "control", true);
+                default_monitor = 0;
+                break;
+            case QEMU_OPTION_mon:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("mon"), optarg,
+                                               true);
+                if (!opts) {
+                    exit(1);
+                }
+                default_monitor = 0;
+                break;
+            case QEMU_OPTION_chardev:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"),
+                                               optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_fsdev:
+                olist = qemu_find_opts("fsdev");
+                if (!olist) {
+                    error_report("fsdev support is disabled");
+                    exit(1);
+                }
+                opts = qemu_opts_parse_noisily(olist, optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_virtfs: {
+                QemuOpts *fsdev;
+                QemuOpts *device;
+                const char *writeout, *sock_fd, *socket, *path, *security_model,
+                           *multidevs;
+
+                olist = qemu_find_opts("virtfs");
+                if (!olist) {
+                    error_report("virtfs support is disabled");
+                    exit(1);
+                }
+                opts = qemu_opts_parse_noisily(olist, optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+
+                if (qemu_opt_get(opts, "fsdriver") == NULL ||
+                    qemu_opt_get(opts, "mount_tag") == NULL) {
+                    error_report("Usage: -virtfs fsdriver,mount_tag=tag");
+                    exit(1);
+                }
+                fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
+                                         qemu_opts_id(opts) ?:
+                                         qemu_opt_get(opts, "mount_tag"),
+                                         1, NULL);
+                if (!fsdev) {
+                    error_report("duplicate or invalid fsdev id: %s",
+                                 qemu_opt_get(opts, "mount_tag"));
+                    exit(1);
+                }
+
+                writeout = qemu_opt_get(opts, "writeout");
+                if (writeout) {
+#ifdef CONFIG_SYNC_FILE_RANGE
+                    qemu_opt_set(fsdev, "writeout", writeout, &error_abort);
+#else
+                    error_report("writeout=immediate not supported "
+                                 "on this platform");
+                    exit(1);
+#endif
+                }
+                qemu_opt_set(fsdev, "fsdriver",
+                             qemu_opt_get(opts, "fsdriver"), &error_abort);
+                path = qemu_opt_get(opts, "path");
+                if (path) {
+                    qemu_opt_set(fsdev, "path", path, &error_abort);
+                }
+                security_model = qemu_opt_get(opts, "security_model");
+                if (security_model) {
+                    qemu_opt_set(fsdev, "security_model", security_model,
+                                 &error_abort);
+                }
+                socket = qemu_opt_get(opts, "socket");
+                if (socket) {
+                    qemu_opt_set(fsdev, "socket", socket, &error_abort);
+                }
+                sock_fd = qemu_opt_get(opts, "sock_fd");
+                if (sock_fd) {
+                    qemu_opt_set(fsdev, "sock_fd", sock_fd, &error_abort);
+                }
+
+                qemu_opt_set_bool(fsdev, "readonly",
+                                  qemu_opt_get_bool(opts, "readonly", 0),
+                                  &error_abort);
+                multidevs = qemu_opt_get(opts, "multidevs");
+                if (multidevs) {
+                    qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort);
+                }
+                device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
+                                          &error_abort);
+                qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);
+                qemu_opt_set(device, "fsdev",
+                             qemu_opts_id(fsdev), &error_abort);
+                qemu_opt_set(device, "mount_tag",
+                             qemu_opt_get(opts, "mount_tag"), &error_abort);
+                break;
+            }
+            case QEMU_OPTION_serial:
+                add_device_config(DEV_SERIAL, optarg);
+                default_serial = 0;
+                if (strncmp(optarg, "mon:", 4) == 0) {
+                    default_monitor = 0;
+                }
+                break;
+            case QEMU_OPTION_action:
+                olist = qemu_find_opts("action");
+                if (!qemu_opts_parse_noisily(olist, optarg, false)) {
+                     exit(1);
+                }
+                break;
+            case QEMU_OPTION_watchdog_action: {
+                opts = qemu_opts_create(qemu_find_opts("action"), NULL, 0, &error_abort);
+                qemu_opt_set(opts, "watchdog", optarg, &error_abort);
+                break;
+            }
+            case QEMU_OPTION_parallel:
+                add_device_config(DEV_PARALLEL, optarg);
+                default_parallel = 0;
+                if (strncmp(optarg, "mon:", 4) == 0) {
+                    default_monitor = 0;
+                }
+                break;
+            case QEMU_OPTION_debugcon:
+                add_device_config(DEV_DEBUGCON, optarg);
+                break;
+            case QEMU_OPTION_loadvm:
+                loadvm = optarg;
+                break;
+            case QEMU_OPTION_full_screen:
+                dpy.has_full_screen = true;
+                dpy.full_screen = true;
+                break;
+            case QEMU_OPTION_pidfile:
+                pid_file = optarg;
+                break;
+            case QEMU_OPTION_win2k_hack:
+                win2k_install_hack = 1;
+                break;
+            case QEMU_OPTION_acpitable:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("acpi"),
+                                               optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                acpi_table_add(opts, &error_fatal);
+                break;
+            case QEMU_OPTION_smbios:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("smbios"),
+                                               optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+                smbios_entry_add(opts, &error_fatal);
+                break;
+            case QEMU_OPTION_fwcfg:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("fw_cfg"),
+                                               optarg, true);
+                if (opts == NULL) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_preconfig:
+                preconfig_requested = true;
+                break;
+            case QEMU_OPTION_enable_kvm:
+                qdict_put_str(machine_opts_dict, "accel", "kvm");
+                break;
+            case QEMU_OPTION_M:
+            case QEMU_OPTION_machine:
+                {
+                    bool help;
+
+                    keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal);
+                    if (help) {
+                        machine_help_func(machine_opts_dict);
+                        exit(EXIT_SUCCESS);
+                    }
+                    break;
+                }
+            case QEMU_OPTION_accel:
+                accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
+                                                     optarg, true);
+                optarg = qemu_opt_get(accel_opts, "accel");
+                if (!optarg || is_help_option(optarg)) {
+                    printf("Accelerators supported in QEMU binary:\n");
+                    GSList *el, *accel_list = object_class_get_list(TYPE_ACCEL,
+                                                                    false);
+                    for (el = accel_list; el; el = el->next) {
+                        gchar *typename = g_strdup(object_class_get_name(
+                                                   OBJECT_CLASS(el->data)));
+                        /* omit qtest which is used for tests only */
+                        if (g_strcmp0(typename, ACCEL_CLASS_NAME("qtest")) &&
+                            g_str_has_suffix(typename, ACCEL_CLASS_SUFFIX)) {
+                            gchar **optname = g_strsplit(typename,
+                                                         ACCEL_CLASS_SUFFIX, 0);
+                            printf("%s\n", optname[0]);
+                            g_strfreev(optname);
+                        }
+                        g_free(typename);
+                    }
+                    g_slist_free(accel_list);
+                    exit(0);
+                }
+                break;
+            case QEMU_OPTION_usb:
+                qdict_put_str(machine_opts_dict, "usb", "on");
+                break;
+            case QEMU_OPTION_usbdevice:
+                qdict_put_str(machine_opts_dict, "usb", "on");
+                add_device_config(DEV_USB, optarg);
+                break;
+            case QEMU_OPTION_device:
+                if (optarg[0] == '{') {
+                    QObject *obj = qobject_from_json(optarg, &error_fatal);
+                    DeviceOption *opt = g_new0(DeviceOption, 1);
+                    opt->opts = qobject_to(QDict, obj);
+                    loc_save(&opt->loc);
+                    assert(opt->opts != NULL);
+                    QTAILQ_INSERT_TAIL(&device_opts, opt, next);
+                } else {
+                    if (!qemu_opts_parse_noisily(qemu_find_opts("device"),
+                                                 optarg, true)) {
+                        exit(1);
+                    }
+                }
+                break;
+            case QEMU_OPTION_smp:
+                machine_parse_property_opt(qemu_find_opts("smp-opts"),
+                                           "smp", optarg);
+                break;
+            case QEMU_OPTION_vnc:
+                vnc_parse(optarg);
+                break;
+            case QEMU_OPTION_no_acpi:
+                warn_report("-no-acpi is deprecated, use '-machine acpi=off' instead");
+                qdict_put_str(machine_opts_dict, "acpi", "off");
+                break;
+            case QEMU_OPTION_no_hpet:
+                warn_report("-no-hpet is deprecated, use '-machine hpet=off' instead");
+                qdict_put_str(machine_opts_dict, "hpet", "off");
+                break;
+            case QEMU_OPTION_no_reboot:
+                olist = qemu_find_opts("action");
+                qemu_opts_parse_noisily(olist, "reboot=shutdown", false);
+                break;
+            case QEMU_OPTION_no_shutdown:
+                olist = qemu_find_opts("action");
+                qemu_opts_parse_noisily(olist, "shutdown=pause", false);
+                break;
+            case QEMU_OPTION_uuid:
+                if (qemu_uuid_parse(optarg, &qemu_uuid) < 0) {
+                    error_report("failed to parse UUID string: wrong format");
+                    exit(1);
+                }
+                qemu_uuid_set = true;
+                break;
+            case QEMU_OPTION_option_rom:
+                if (nb_option_roms >= MAX_OPTION_ROMS) {
+                    error_report("too many option ROMs");
+                    exit(1);
+                }
+                opts = qemu_opts_parse_noisily(qemu_find_opts("option-rom"),
+                                               optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                option_rom[nb_option_roms].name = qemu_opt_get(opts, "romfile");
+                option_rom[nb_option_roms].bootindex =
+                    qemu_opt_get_number(opts, "bootindex", -1);
+                if (!option_rom[nb_option_roms].name) {
+                    error_report("Option ROM file is not specified");
+                    exit(1);
+                }
+                nb_option_roms++;
+                break;
+            case QEMU_OPTION_semihosting:
+                qemu_semihosting_enable();
+                break;
+            case QEMU_OPTION_semihosting_config:
+                if (qemu_semihosting_config_options(optarg) != 0) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_name:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("name"),
+                                               optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                /* Capture guest name if -msg guest-name is used later */
+                error_guest_name = qemu_opt_get(opts, "guest");
+                break;
+            case QEMU_OPTION_prom_env:
+                if (nb_prom_envs >= MAX_PROM_ENVS) {
+                    error_report("too many prom variables");
+                    exit(1);
+                }
+                prom_envs[nb_prom_envs] = optarg;
+                nb_prom_envs++;
+                break;
+            case QEMU_OPTION_old_param:
+                old_param = 1;
+                break;
+            case QEMU_OPTION_rtc:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("rtc"), optarg,
+                                               false);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_icount:
+                icount_opts = qemu_opts_parse_noisily(qemu_find_opts("icount"),
+                                                      optarg, true);
+                if (!icount_opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_incoming:
+                if (!incoming) {
+                    runstate_set(RUN_STATE_INMIGRATE);
+                }
+                incoming = optarg;
+                break;
+            case QEMU_OPTION_only_migratable:
+                only_migratable = 1;
+                break;
+            case QEMU_OPTION_nodefaults:
+                has_defaults = 0;
+                break;
+            case QEMU_OPTION_xen_domid:
+                if (!(accel_find("xen")) && !(accel_find("kvm"))) {
+                    error_report("Option not supported for this target");
+                    exit(1);
+                }
+                xen_domid = atoi(optarg);
+                break;
+            case QEMU_OPTION_xen_attach:
+                if (!(accel_find("xen"))) {
+                    error_report("Option not supported for this target");
+                    exit(1);
+                }
+                xen_mode = XEN_ATTACH;
+                break;
+            case QEMU_OPTION_xen_domid_restrict:
+                if (!(accel_find("xen"))) {
+                    error_report("Option not supported for this target");
+                    exit(1);
+                }
+                xen_domid_restrict = true;
+                break;
+            case QEMU_OPTION_trace:
+                trace_opt_parse(optarg);
+                break;
+            case QEMU_OPTION_plugin:
+                qemu_plugin_opt_parse(optarg, &plugin_list);
+                break;
+            case QEMU_OPTION_readconfig:
+                qemu_read_config_file(optarg, qemu_parse_config_group, &error_fatal);
+                break;
+#ifdef CONFIG_SPICE
+            case QEMU_OPTION_spice:
+                olist = qemu_find_opts_err("spice", NULL);
+                if (!olist) {
+                    error_report("spice support is disabled");
+                    exit(1);
+                }
+                opts = qemu_opts_parse_noisily(olist, optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+                display_remote++;
+                break;
+#endif
+            case QEMU_OPTION_qtest:
+                qtest_chrdev = optarg;
+                break;
+            case QEMU_OPTION_qtest_log:
+                qtest_log = optarg;
+                break;
+            case QEMU_OPTION_sandbox:
+                olist = qemu_find_opts("sandbox");
+                if (!olist) {
+#ifndef CONFIG_SECCOMP
+                    error_report("-sandbox support is not enabled "
+                                 "in this QEMU binary");
+#endif
+                    exit(1);
+                }
+
+                opts = qemu_opts_parse_noisily(olist, optarg, true);
+                if (!opts) {
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_add_fd:
+#ifndef _WIN32
+                opts = qemu_opts_parse_noisily(qemu_find_opts("add-fd"),
+                                               optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+#else
+                error_report("File descriptor passing is disabled on this "
+                             "platform");
+                exit(1);
+#endif
+                break;
+            case QEMU_OPTION_object:
+                object_option_parse(optarg);
+                break;
+            case QEMU_OPTION_overcommit:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("overcommit"),
+                                               optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+                enable_mlock = qemu_opt_get_bool(opts, "mem-lock", false);
+                enable_cpu_pm = qemu_opt_get_bool(opts, "cpu-pm", false);
+                break;
+            case QEMU_OPTION_compat:
+                {
+                    CompatPolicy *opts_policy;
+                    Visitor *v;
+
+                    v = qobject_input_visitor_new_str(optarg, NULL,
+                                                      &error_fatal);
+
+                    visit_type_CompatPolicy(v, NULL, &opts_policy, &error_fatal);
+                    QAPI_CLONE_MEMBERS(CompatPolicy, &compat_policy, opts_policy);
+
+                    qapi_free_CompatPolicy(opts_policy);
+                    visit_free(v);
+                    break;
+                }
+            case QEMU_OPTION_msg:
+                opts = qemu_opts_parse_noisily(qemu_find_opts("msg"), optarg,
+                                               false);
+                if (!opts) {
+                    exit(1);
+                }
+                configure_msg(opts);
+                break;
+            case QEMU_OPTION_dump_vmstate:
+                if (vmstate_dump_file) {
+                    error_report("only one '-dump-vmstate' "
+                                 "option may be given");
+                    exit(1);
+                }
+                vmstate_dump_file = fopen(optarg, "w");
+                if (vmstate_dump_file == NULL) {
+                    error_report("open %s: %s", optarg, strerror(errno));
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_enable_sync_profile:
+                qsp_enable();
+                break;
+            case QEMU_OPTION_nouserconfig:
+                /* Nothing to be parsed here. Especially, do not error out below. */
+                break;
+#if defined(CONFIG_POSIX)
+            case QEMU_OPTION_runas:
+                if (!os_set_runas(optarg)) {
+                    error_report("User \"%s\" doesn't exist"
+                                 " (and is not <uid>:<gid>)",
+                                 optarg);
+                    exit(1);
+                }
+                break;
+            case QEMU_OPTION_chroot:
+                warn_report("option is deprecated,"
+                            " use '-run-with chroot=...' instead");
+                os_set_chroot(optarg);
+                break;
+            case QEMU_OPTION_daemonize:
+                os_set_daemonize(true);
+                break;
+#if defined(CONFIG_LINUX)
+            /* deprecated */
+            case QEMU_OPTION_asyncteardown:
+                init_async_teardown();
+                break;
+#endif
+            case QEMU_OPTION_run_with: {
+                const char *str;
+                opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"),
+                                                         optarg, false);
+                if (!opts) {
+                    exit(1);
+                }
+#if defined(CONFIG_LINUX)
+                if (qemu_opt_get_bool(opts, "async-teardown", false)) {
+                    init_async_teardown();
+                }
+#endif
+                str = qemu_opt_get(opts, "chroot");
+                if (str) {
+                    os_set_chroot(str);
+                }
+                break;
+            }
+#endif /* CONFIG_POSIX */
+
+            default:
+                error_report("Option not supported in this build");
+                exit(1);
+            }
+        }
+    }
+    /*
+     * Clear error location left behind by the loop.
+     * Best done right after the loop.  Do not insert code here!
+     */
+    loc_set_none();
+
+    qemu_validate_options(machine_opts_dict);
+    qemu_process_sugar_options();
+
+    /*
+     * These options affect everything else and should be processed
+     * before daemonizing.
+     */
+    qemu_process_early_options();
+
+    qemu_process_help_options();
+    qemu_maybe_daemonize(pid_file);
+
+    /*
+     * The trace backend must be initialized after daemonizing.
+     * trace_init_backends() will call st_init(), which will create the
+     * trace thread in the parent, and also register st_flush_trace_buffer()
+     * in atexit(). This function will force the parent to wait for the
+     * writeout thread to finish, which will not occur, and the parent
+     * process will be left in the host.
+     */
+    if (!trace_init_backends()) {
+        exit(1);
+    }
+    trace_init_file();
+
+    qemu_init_main_loop(&error_fatal);
+    cpu_timers_init();
+
+    user_register_global_props();
+    replay_configure(icount_opts);
+
+    configure_rtc(qemu_find_opts_singleton("rtc"));
+
+    /* Transfer QemuOpts options into machine options */
+    parse_memory_options();
+
+    qemu_create_machine(machine_opts_dict);
+
+    suspend_mux_open();
+
+    qemu_disable_default_devices();
+    qemu_create_default_devices();
+    qemu_create_early_backends();
+
+    qemu_apply_legacy_machine_options(machine_opts_dict);
+    qemu_apply_machine_options(machine_opts_dict);
+    qobject_unref(machine_opts_dict);
+    phase_advance(PHASE_MACHINE_CREATED);
+
+    /*
+     * Note: uses machine properties such as kernel-irqchip, must run
+     * after qemu_apply_machine_options.
+     */
+    configure_accelerators(argv[0]);
+    phase_advance(PHASE_ACCEL_CREATED);
+
+    /*
+     * Beware, QOM objects created before this point miss global and
+     * compat properties.
+     *
+     * Global properties get set up by qdev_prop_register_global(),
+     * called from user_register_global_props(), and certain option
+     * desugaring.  Also in CPU feature desugaring (buried in
+     * parse_cpu_option()), which happens below this point, but may
+     * only target the CPU type, which can only be created after
+     * parse_cpu_option() returned the type.
+     *
+     * Machine compat properties: object_set_machine_compat_props().
+     * Accelerator compat props: object_set_accelerator_compat_props(),
+     * called from do_configure_accelerator().
+     */
+
+    machine_class = MACHINE_GET_CLASS(current_machine);
+    if (!qtest_enabled() && machine_class->deprecation_reason) {
+        warn_report("Machine type '%s' is deprecated: %s",
+                     machine_class->name, machine_class->deprecation_reason);
+    }
+
+    /*
+     * Create backends before creating migration objects, so that it can
+     * check against compatibilities on the backend memories (e.g. postcopy
+     * over memory-backend-file objects).
+     */
+    qemu_create_late_backends();
+
+    /*
+     * Note: creates a QOM object, must run only after global and
+     * compat properties have been set up.
+     */
+    migration_object_init();
+
+    /* parse features once if machine provides default cpu_type */
+    current_machine->cpu_type = machine_class->default_cpu_type;
+    if (cpu_option) {
+        current_machine->cpu_type = parse_cpu_option(cpu_option);
+    }
+    /* NB: for machine none cpu_type could STILL be NULL here! */
+
+    qemu_resolve_machine_memdev();
+    parse_numa_opts(current_machine);
+
+    if (vmstate_dump_file) {
+        /* dump and exit */
+        module_load_qom_all();
+        dump_vmstate_json_to_file(vmstate_dump_file);
+        exit(0);
+    }
+
+    if (!preconfig_requested) {
+        qmp_x_exit_preconfig(&error_fatal);
+    }
+    qemu_init_displays();
+    accel_setup_post(current_machine);
+    os_setup_post();
+    resume_mux_open();
+}
diff --git a/system/watchpoint.c b/system/watchpoint.c
new file mode 100644
index 0000000..45d1f12
--- /dev/null
+++ b/system/watchpoint.c
@@ -0,0 +1,226 @@
+/*
+ * CPU watchpoints
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "exec/translate-all.h"
+#include "sysemu/tcg.h"
+#include "sysemu/replay.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "hw/core/cpu.h"
+
+/* Add a watchpoint.  */
+int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
+                          int flags, CPUWatchpoint **watchpoint)
+{
+    CPUWatchpoint *wp;
+    vaddr in_page;
+
+    /* forbid ranges which are empty or run off the end of the address space */
+    if (len == 0 || (addr + len - 1) < addr) {
+        error_report("tried to set invalid watchpoint at %"
+                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
+        return -EINVAL;
+    }
+    wp = g_malloc(sizeof(*wp));
+
+    wp->vaddr = addr;
+    wp->len = len;
+    wp->flags = flags;
+
+    /* keep all GDB-injected watchpoints in front */
+    if (flags & BP_GDB) {
+        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
+    } else {
+        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
+    }
+
+    in_page = -(addr | TARGET_PAGE_MASK);
+    if (len <= in_page) {
+        tlb_flush_page(cpu, addr);
+    } else {
+        tlb_flush(cpu);
+    }
+
+    if (watchpoint) {
+        *watchpoint = wp;
+    }
+    return 0;
+}
+
+/* Remove a specific watchpoint.  */
+int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
+                          int flags)
+{
+    CPUWatchpoint *wp;
+
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        if (addr == wp->vaddr && len == wp->len
+                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
+            cpu_watchpoint_remove_by_ref(cpu, wp);
+            return 0;
+        }
+    }
+    return -ENOENT;
+}
+
+/* Remove a specific watchpoint by reference.  */
+void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
+{
+    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
+
+    tlb_flush_page(cpu, watchpoint->vaddr);
+
+    g_free(watchpoint);
+}
+
+/* Remove all matching watchpoints.  */
+void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
+{
+    CPUWatchpoint *wp, *next;
+
+    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
+        if (wp->flags & mask) {
+            cpu_watchpoint_remove_by_ref(cpu, wp);
+        }
+    }
+}
+
+#ifdef CONFIG_TCG
+
+/*
+ * Return true if this watchpoint address matches the specified
+ * access (ie the address range covered by the watchpoint overlaps
+ * partially or completely with the address range covered by the
+ * access).
+ */
+static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
+                                              vaddr addr, vaddr len)
+{
+    /*
+     * We know the lengths are non-zero, but a little caution is
+     * required to avoid errors in the case where the range ends
+     * exactly at the top of the address space and so addr + len
+     * wraps round to zero.
+     */
+    vaddr wpend = wp->vaddr + wp->len - 1;
+    vaddr addrend = addr + len - 1;
+
+    return !(addr > wpend || wp->vaddr > addrend);
+}
+
+/* Return flags for watchpoints that match addr + prot.  */
+int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
+{
+    CPUWatchpoint *wp;
+    int ret = 0;
+
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        if (watchpoint_address_matches(wp, addr, len)) {
+            ret |= wp->flags;
+        }
+    }
+    return ret;
+}
+
+/* Generate a debug exception if a watchpoint has been hit.  */
+void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
+                          MemTxAttrs attrs, int flags, uintptr_t ra)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    CPUWatchpoint *wp;
+
+    assert(tcg_enabled());
+    if (cpu->watchpoint_hit) {
+        /*
+         * We re-entered the check after replacing the TB.
+         * Now raise the debug interrupt so that it will
+         * trigger after the current instruction.
+         */
+        qemu_mutex_lock_iothread();
+        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
+        qemu_mutex_unlock_iothread();
+        return;
+    }
+
+    if (cc->tcg_ops->adjust_watchpoint_address) {
+        /* this is currently used only by ARM BE32 */
+        addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
+    }
+
+    assert((flags & ~BP_MEM_ACCESS) == 0);
+    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
+        int hit_flags = wp->flags & flags;
+
+        if (hit_flags && watchpoint_address_matches(wp, addr, len)) {
+            if (replay_running_debug()) {
+                /*
+                 * replay_breakpoint reads icount.
+                 * Force recompile to succeed, because icount may
+                 * be read only at the end of the block.
+                 */
+                if (!cpu->neg.can_do_io) {
+                    /* Force execution of one insn next time.  */
+                    cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
+                                          | curr_cflags(cpu);
+                    cpu_loop_exit_restore(cpu, ra);
+                }
+                /*
+                 * Don't process the watchpoints when we are
+                 * in a reverse debugging operation.
+                 */
+                replay_breakpoint();
+                return;
+            }
+
+            wp->flags |= hit_flags << BP_HIT_SHIFT;
+            wp->hitaddr = MAX(addr, wp->vaddr);
+            wp->hitattrs = attrs;
+
+            if (wp->flags & BP_CPU
+                && cc->tcg_ops->debug_check_watchpoint
+                && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
+                wp->flags &= ~BP_WATCHPOINT_HIT;
+                continue;
+            }
+            cpu->watchpoint_hit = wp;
+
+            mmap_lock();
+            /* This call also restores vCPU state */
+            tb_check_watchpoint(cpu, ra);
+            if (wp->flags & BP_STOP_BEFORE_ACCESS) {
+                cpu->exception_index = EXCP_DEBUG;
+                mmap_unlock();
+                cpu_loop_exit(cpu);
+            } else {
+                /* Force execution of one insn next time.  */
+                cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ
+                                      | curr_cflags(cpu);
+                mmap_unlock();
+                cpu_loop_exit_noexc(cpu);
+            }
+        } else {
+            wp->flags &= ~BP_WATCHPOINT_HIT;
+        }
+    }
+}
+
+#endif /* CONFIG_TCG */
author	Philippe Mathieu-Daudé <philmd@linaro.org>	2023-10-04 11:06:28 +0200
committer	Paolo Bonzini <pbonzini@redhat.com>	2023-10-08 21:08:08 +0200
commit	8d7f2e767d8cd058c817dbe31430b89f2e11535d (patch)
tree	5b7c25cddf7e6c1b9dfc93c5169541e8c14e2901 /system
parent	01c85e60a4d0675f0ad203fe1fe119381e7ad15b (diff)
download	qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.zip qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.tar.gz qemu-8d7f2e767d8cd058c817dbe31430b89f2e11535d.tar.bz2