aboutsummaryrefslogtreecommitdiff
path: root/accel
diff options
context:
space:
mode:
Diffstat (limited to 'accel')
-rw-r--r--accel/Kconfig3
-rw-r--r--accel/accel-common.c144
-rw-r--r--accel/accel-internal.h (renamed from accel/accel-system.h)10
-rw-r--r--accel/accel-irq.c106
-rw-r--r--accel/accel-qmp.c35
-rw-r--r--accel/accel-system.c32
-rw-r--r--accel/accel-target.c134
-rw-r--r--accel/accel-user.c6
-rw-r--r--accel/dummy-cpus.c3
-rw-r--r--accel/dummy-cpus.h14
-rw-r--r--accel/hvf/hvf-accel-ops.c333
-rw-r--r--accel/hvf/hvf-all.c271
-rw-r--r--accel/hvf/trace-events7
-rw-r--r--accel/hvf/trace.h2
-rw-r--r--accel/kvm/kvm-accel-ops.c8
-rw-r--r--accel/kvm/kvm-all.c221
-rw-r--r--accel/meson.build4
-rw-r--r--accel/mshv/irq.c399
-rw-r--r--accel/mshv/mem.c563
-rw-r--r--accel/mshv/meson.build9
-rw-r--r--accel/mshv/mshv-all.c727
-rw-r--r--accel/mshv/msr.c375
-rw-r--r--accel/mshv/trace-events33
-rw-r--r--accel/mshv/trace.h14
-rw-r--r--accel/qtest/qtest.c11
-rw-r--r--accel/stubs/hvf-stub.c12
-rw-r--r--accel/stubs/kvm-stub.c9
-rw-r--r--accel/stubs/meson.build4
-rw-r--r--accel/stubs/mshv-stub.c44
-rw-r--r--accel/stubs/nvmm-stub.c12
-rw-r--r--accel/stubs/tcg-stub.c8
-rw-r--r--accel/stubs/whpx-stub.c12
-rw-r--r--accel/tcg/atomic_common.c.inc9
-rw-r--r--accel/tcg/atomic_template.h96
-rw-r--r--accel/tcg/backend-ldst.h41
-rw-r--r--accel/tcg/cpu-exec.c233
-rw-r--r--accel/tcg/cputlb.c246
-rw-r--r--accel/tcg/icount-common.c2
-rw-r--r--accel/tcg/internal-common.h73
-rw-r--r--accel/tcg/internal-target.h79
-rw-r--r--accel/tcg/ldst_common.c.inc339
-rw-r--r--accel/tcg/meson.build32
-rw-r--r--accel/tcg/monitor.c208
-rw-r--r--accel/tcg/plugin-gen.c47
-rw-r--r--accel/tcg/tb-hash.h4
-rw-r--r--accel/tcg/tb-internal.h46
-rw-r--r--accel/tcg/tb-maint.c160
-rw-r--r--accel/tcg/tcg-accel-ops-icount.c2
-rw-r--r--accel/tcg/tcg-accel-ops-mttcg.c15
-rw-r--r--accel/tcg/tcg-accel-ops-mttcg.h3
-rw-r--r--accel/tcg/tcg-accel-ops-rr.c57
-rw-r--r--accel/tcg/tcg-accel-ops.c19
-rw-r--r--accel/tcg/tcg-accel-ops.h1
-rw-r--r--accel/tcg/tcg-all.c133
-rw-r--r--accel/tcg/tcg-runtime.h12
-rw-r--r--accel/tcg/tcg-stats.c219
-rw-r--r--accel/tcg/tlb-bounds.h13
-rw-r--r--accel/tcg/translate-all.c113
-rw-r--r--accel/tcg/translator.c135
-rw-r--r--accel/tcg/user-exec.c378
-rw-r--r--accel/tcg/watchpoint.c3
-rw-r--r--accel/xen/xen-all.c13
62 files changed, 4272 insertions, 2024 deletions
diff --git a/accel/Kconfig b/accel/Kconfig
index 4263cab..a60f114 100644
--- a/accel/Kconfig
+++ b/accel/Kconfig
@@ -13,6 +13,9 @@ config TCG
config KVM
bool
+config MSHV
+ bool
+
config XEN
bool
select FSDEV_9P if VIRTFS
diff --git a/accel/accel-common.c b/accel/accel-common.c
new file mode 100644
index 0000000..850c5ab
--- /dev/null
+++ b/accel/accel-common.c
@@ -0,0 +1,144 @@
+/*
+ * QEMU accel class, components common to system emulation and user mode
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/accel.h"
+#include "qemu/target-info.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu.h"
+#include "accel/accel-cpu-ops.h"
+#include "accel-internal.h"
+
+/* Lookup AccelClass from opt_name. Returns NULL if not found */
+AccelClass *accel_find(const char *opt_name)
+{
+ char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
+ AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name));
+ g_free(class_name);
+ return ac;
+}
+
+/* Return the name of the current accelerator */
+const char *current_accel_name(void)
+{
+ AccelClass *ac = ACCEL_GET_CLASS(current_accel());
+
+ return ac->name;
+}
+
+static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque)
+{
+ CPUClass *cc = CPU_CLASS(klass);
+ AccelCPUClass *accel_cpu = opaque;
+
+ /*
+ * The first callback allows accel-cpu to run initializations
+ * for the CPU, customizing CPU behavior according to the accelerator.
+ *
+ * The second one allows the CPU to customize the accel-cpu
+ * behavior according to the CPU.
+ *
+ * The second is currently only used by TCG, to specialize the
+ * TCGCPUOps depending on the CPU type.
+ */
+ cc->accel_cpu = accel_cpu;
+ if (accel_cpu->cpu_class_init) {
+ accel_cpu->cpu_class_init(cc);
+ }
+ if (cc->init_accel_cpu) {
+ cc->init_accel_cpu(accel_cpu, cc);
+ }
+}
+
+/* initialize the arch-specific accel CpuClass interfaces */
+static void accel_init_cpu_interfaces(AccelClass *ac)
+{
+ const char *ac_name; /* AccelClass name */
+ char *acc_name; /* AccelCPUClass name */
+ ObjectClass *acc; /* AccelCPUClass */
+ const char *cpu_resolving_type = target_cpu_type();
+
+ ac_name = object_class_get_name(OBJECT_CLASS(ac));
+ g_assert(ac_name != NULL);
+
+ acc_name = g_strdup_printf("%s-%s", ac_name, cpu_resolving_type);
+ acc = object_class_by_name(acc_name);
+ g_free(acc_name);
+
+ if (acc) {
+ object_class_foreach(accel_init_cpu_int_aux,
+ cpu_resolving_type, false, acc);
+ }
+}
+
+void accel_init_interfaces(AccelClass *ac)
+{
+ accel_init_ops_interfaces(ac);
+ accel_init_cpu_interfaces(ac);
+}
+
+void accel_cpu_instance_init(CPUState *cpu)
+{
+ if (cpu->cc->accel_cpu && cpu->cc->accel_cpu->cpu_instance_init) {
+ cpu->cc->accel_cpu->cpu_instance_init(cpu);
+ }
+}
+
+bool accel_cpu_common_realize(CPUState *cpu, Error **errp)
+{
+ AccelState *accel = current_accel();
+ AccelClass *acc = ACCEL_GET_CLASS(accel);
+
+ /* target specific realization */
+ if (cpu->cc->accel_cpu
+ && cpu->cc->accel_cpu->cpu_target_realize
+ && !cpu->cc->accel_cpu->cpu_target_realize(cpu, errp)) {
+ return false;
+ }
+
+ /* generic realization */
+ if (acc->cpu_common_realize && !acc->cpu_common_realize(cpu, errp)) {
+ return false;
+ }
+
+ return true;
+}
+
+void accel_cpu_common_unrealize(CPUState *cpu)
+{
+ AccelState *accel = current_accel();
+ AccelClass *acc = ACCEL_GET_CLASS(accel);
+
+ /* generic unrealization */
+ if (acc->cpu_common_unrealize) {
+ acc->cpu_common_unrealize(cpu);
+ }
+}
+
+int accel_supported_gdbstub_sstep_flags(void)
+{
+ AccelState *accel = current_accel();
+ AccelClass *acc = ACCEL_GET_CLASS(accel);
+ if (acc->gdbstub_supported_sstep_flags) {
+ return acc->gdbstub_supported_sstep_flags(accel);
+ }
+ return 0;
+}
+
+static const TypeInfo accel_types[] = {
+ {
+ .name = TYPE_ACCEL,
+ .parent = TYPE_OBJECT,
+ .class_size = sizeof(AccelClass),
+ .instance_size = sizeof(AccelState),
+ .abstract = true,
+ },
+};
+
+DEFINE_TYPES(accel_types)
diff --git a/accel/accel-system.h b/accel/accel-internal.h
index 2d37c73..d3a4422 100644
--- a/accel/accel-system.h
+++ b/accel/accel-internal.h
@@ -1,5 +1,5 @@
/*
- * QEMU System Emulation accel internal functions
+ * QEMU accel internal functions
*
* Copyright 2021 SUSE LLC
*
@@ -7,9 +7,11 @@
* See the COPYING file in the top-level directory.
*/
-#ifndef ACCEL_SYSTEM_H
-#define ACCEL_SYSTEM_H
+#ifndef ACCEL_INTERNAL_H
+#define ACCEL_INTERNAL_H
-void accel_system_init_ops_interfaces(AccelClass *ac);
+#include "qemu/accel.h"
+
+void accel_init_ops_interfaces(AccelClass *ac);
#endif /* ACCEL_SYSTEM_H */
diff --git a/accel/accel-irq.c b/accel/accel-irq.c
new file mode 100644
index 0000000..7f864e3
--- /dev/null
+++ b/accel/accel-irq.c
@@ -0,0 +1,106 @@
+/*
+ * Accelerated irqchip abstraction
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/msi.h"
+
+#include "system/kvm.h"
+#include "system/mshv.h"
+#include "system/accel-irq.h"
+
+int accel_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ return mshv_irqchip_add_msi_route(vector, dev);
+ }
+#endif
+ if (kvm_enabled()) {
+ return kvm_irqchip_add_msi_route(c, vector, dev);
+ }
+ return -ENOSYS;
+}
+
+int accel_irqchip_update_msi_route(int vector, MSIMessage msg, PCIDevice *dev)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ return mshv_irqchip_update_msi_route(vector, msg, dev);
+ }
+#endif
+ if (kvm_enabled()) {
+ return kvm_irqchip_update_msi_route(kvm_state, vector, msg, dev);
+ }
+ return -ENOSYS;
+}
+
+void accel_irqchip_commit_route_changes(KVMRouteChange *c)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ mshv_irqchip_commit_routes();
+ }
+#endif
+ if (kvm_enabled()) {
+ kvm_irqchip_commit_route_changes(c);
+ }
+}
+
+void accel_irqchip_commit_routes(void)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ mshv_irqchip_commit_routes();
+ }
+#endif
+ if (kvm_enabled()) {
+ kvm_irqchip_commit_routes(kvm_state);
+ }
+}
+
+void accel_irqchip_release_virq(int virq)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ mshv_irqchip_release_virq(virq);
+ }
+#endif
+ if (kvm_enabled()) {
+ kvm_irqchip_release_virq(kvm_state, virq);
+ }
+}
+
+int accel_irqchip_add_irqfd_notifier_gsi(EventNotifier *n, EventNotifier *rn,
+ int virq)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ return mshv_irqchip_add_irqfd_notifier_gsi(n, rn, virq);
+ }
+#endif
+ if (kvm_enabled()) {
+ return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, rn, virq);
+ }
+ return -ENOSYS;
+}
+
+int accel_irqchip_remove_irqfd_notifier_gsi(EventNotifier *n, int virq)
+{
+#ifdef CONFIG_MSHV_IS_POSSIBLE
+ if (mshv_msi_via_irqfd_enabled()) {
+ return mshv_irqchip_remove_irqfd_notifier_gsi(n, virq);
+ }
+#endif
+ if (kvm_enabled()) {
+ return kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, virq);
+ }
+ return -ENOSYS;
+}
diff --git a/accel/accel-qmp.c b/accel/accel-qmp.c
new file mode 100644
index 0000000..5fb70c6
--- /dev/null
+++ b/accel/accel-qmp.c
@@ -0,0 +1,35 @@
+/*
+ * QMP commands related to accelerators
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/accel.h"
+#include "qapi/type-helpers.h"
+#include "qapi/qapi-commands-accelerator.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
+#include "hw/core/cpu.h"
+
+HumanReadableText *qmp_x_accel_stats(Error **errp)
+{
+ AccelState *accel = current_accel();
+ AccelClass *acc = ACCEL_GET_CLASS(accel);
+ g_autoptr(GString) buf = g_string_new("");
+
+ if (acc->get_stats) {
+ acc->get_stats(accel, buf);
+ }
+ if (acc->ops->get_vcpu_stats) {
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ acc->ops->get_vcpu_stats(cpu, buf);
+ }
+ }
+
+ return human_readable_text_from_str(buf);
+}
diff --git a/accel/accel-system.c b/accel/accel-system.c
index 5df49fb..1e97c64 100644
--- a/accel/accel-system.c
+++ b/accel/accel-system.c
@@ -25,11 +25,15 @@
#include "qemu/osdep.h"
#include "qemu/accel.h"
+#include "qapi/qapi-commands-accelerator.h"
+#include "monitor/monitor.h"
#include "hw/boards.h"
-#include "system/accel-ops.h"
+#include "hw/core/cpu.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
#include "system/cpus.h"
#include "qemu/error-report.h"
-#include "accel-system.h"
+#include "accel-internal.h"
int accel_init_machine(AccelState *accel, MachineState *ms)
{
@@ -37,7 +41,7 @@ int accel_init_machine(AccelState *accel, MachineState *ms)
int ret;
ms->accelerator = accel;
*(acc->allowed) = true;
- ret = acc->init_machine(ms);
+ ret = acc->init_machine(accel, ms);
if (ret < 0) {
ms->accelerator = NULL;
*(acc->allowed) = false;
@@ -58,12 +62,21 @@ void accel_setup_post(MachineState *ms)
AccelState *accel = ms->accelerator;
AccelClass *acc = ACCEL_GET_CLASS(accel);
if (acc->setup_post) {
- acc->setup_post(ms, accel);
+ acc->setup_post(accel);
+ }
+}
+
+void accel_pre_resume(MachineState *ms, bool step_pending)
+{
+ AccelState *accel = ms->accelerator;
+ AccelClass *acc = ACCEL_GET_CLASS(accel);
+ if (acc->pre_resume_vm) {
+ acc->pre_resume_vm(accel, step_pending);
}
}
/* initialize the arch-independent accel operation interfaces */
-void accel_system_init_ops_interfaces(AccelClass *ac)
+void accel_init_ops_interfaces(AccelClass *ac)
{
const char *ac_name;
char *ops_name;
@@ -85,17 +98,24 @@ void accel_system_init_ops_interfaces(AccelClass *ac)
* non-NULL create_vcpu_thread operation.
*/
ops = ACCEL_OPS_CLASS(oc);
+ ac->ops = ops;
if (ops->ops_init) {
- ops->ops_init(ops);
+ ops->ops_init(ac);
}
cpus_register_accel(ops);
}
+static void accel_ops_class_init(ObjectClass *oc, const void *data)
+{
+ monitor_register_hmp_info_hrt("accel", qmp_x_accel_stats);
+}
+
static const TypeInfo accel_ops_type_info = {
.name = TYPE_ACCEL_OPS,
.parent = TYPE_OBJECT,
.abstract = true,
.class_size = sizeof(AccelOpsClass),
+ .class_init = accel_ops_class_init,
};
static void accel_system_register_types(void)
diff --git a/accel/accel-target.c b/accel/accel-target.c
index 33a539b..7fd392f 100644
--- a/accel/accel-target.c
+++ b/accel/accel-target.c
@@ -24,141 +24,8 @@
*/
#include "qemu/osdep.h"
-#include "qemu/accel.h"
-
-#include "cpu.h"
#include "accel/accel-cpu-target.h"
-#ifndef CONFIG_USER_ONLY
-#include "accel-system.h"
-#endif /* !CONFIG_USER_ONLY */
-
-static const TypeInfo accel_type = {
- .name = TYPE_ACCEL,
- .parent = TYPE_OBJECT,
- .class_size = sizeof(AccelClass),
- .instance_size = sizeof(AccelState),
- .abstract = true,
-};
-
-/* Lookup AccelClass from opt_name. Returns NULL if not found */
-AccelClass *accel_find(const char *opt_name)
-{
- char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
- AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name));
- g_free(class_name);
- return ac;
-}
-
-/* Return the name of the current accelerator */
-const char *current_accel_name(void)
-{
- AccelClass *ac = ACCEL_GET_CLASS(current_accel());
-
- return ac->name;
-}
-
-static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque)
-{
- CPUClass *cc = CPU_CLASS(klass);
- AccelCPUClass *accel_cpu = opaque;
-
- /*
- * The first callback allows accel-cpu to run initializations
- * for the CPU, customizing CPU behavior according to the accelerator.
- *
- * The second one allows the CPU to customize the accel-cpu
- * behavior according to the CPU.
- *
- * The second is currently only used by TCG, to specialize the
- * TCGCPUOps depending on the CPU type.
- */
- cc->accel_cpu = accel_cpu;
- if (accel_cpu->cpu_class_init) {
- accel_cpu->cpu_class_init(cc);
- }
- if (cc->init_accel_cpu) {
- cc->init_accel_cpu(accel_cpu, cc);
- }
-}
-
-/* initialize the arch-specific accel CpuClass interfaces */
-static void accel_init_cpu_interfaces(AccelClass *ac)
-{
- const char *ac_name; /* AccelClass name */
- char *acc_name; /* AccelCPUClass name */
- ObjectClass *acc; /* AccelCPUClass */
-
- ac_name = object_class_get_name(OBJECT_CLASS(ac));
- g_assert(ac_name != NULL);
-
- acc_name = g_strdup_printf("%s-%s", ac_name, CPU_RESOLVING_TYPE);
- acc = object_class_by_name(acc_name);
- g_free(acc_name);
-
- if (acc) {
- object_class_foreach(accel_init_cpu_int_aux,
- CPU_RESOLVING_TYPE, false, acc);
- }
-}
-
-void accel_init_interfaces(AccelClass *ac)
-{
-#ifndef CONFIG_USER_ONLY
- accel_system_init_ops_interfaces(ac);
-#endif /* !CONFIG_USER_ONLY */
-
- accel_init_cpu_interfaces(ac);
-}
-
-void accel_cpu_instance_init(CPUState *cpu)
-{
- if (cpu->cc->accel_cpu && cpu->cc->accel_cpu->cpu_instance_init) {
- cpu->cc->accel_cpu->cpu_instance_init(cpu);
- }
-}
-
-bool accel_cpu_common_realize(CPUState *cpu, Error **errp)
-{
- AccelState *accel = current_accel();
- AccelClass *acc = ACCEL_GET_CLASS(accel);
-
- /* target specific realization */
- if (cpu->cc->accel_cpu
- && cpu->cc->accel_cpu->cpu_target_realize
- && !cpu->cc->accel_cpu->cpu_target_realize(cpu, errp)) {
- return false;
- }
-
- /* generic realization */
- if (acc->cpu_common_realize && !acc->cpu_common_realize(cpu, errp)) {
- return false;
- }
-
- return true;
-}
-
-void accel_cpu_common_unrealize(CPUState *cpu)
-{
- AccelState *accel = current_accel();
- AccelClass *acc = ACCEL_GET_CLASS(accel);
-
- /* generic unrealization */
- if (acc->cpu_common_unrealize) {
- acc->cpu_common_unrealize(cpu);
- }
-}
-
-int accel_supported_gdbstub_sstep_flags(void)
-{
- AccelState *accel = current_accel();
- AccelClass *acc = ACCEL_GET_CLASS(accel);
- if (acc->gdbstub_supported_sstep_flags) {
- return acc->gdbstub_supported_sstep_flags();
- }
- return 0;
-}
-
static const TypeInfo accel_cpu_type = {
.name = TYPE_ACCEL_CPU,
.parent = TYPE_OBJECT,
@@ -168,7 +35,6 @@ static const TypeInfo accel_cpu_type = {
static void register_accel_types(void)
{
- type_register_static(&accel_type);
type_register_static(&accel_cpu_type);
}
diff --git a/accel/accel-user.c b/accel/accel-user.c
index 22b6a1a..7d19230 100644
--- a/accel/accel-user.c
+++ b/accel/accel-user.c
@@ -9,6 +9,12 @@
#include "qemu/osdep.h"
#include "qemu/accel.h"
+#include "accel-internal.h"
+
+void accel_init_ops_interfaces(AccelClass *ac)
+{
+ /* nothing */
+}
AccelState *current_accel(void)
{
diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
index 8672761..5752f63 100644
--- a/accel/dummy-cpus.c
+++ b/accel/dummy-cpus.c
@@ -17,6 +17,7 @@
#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
+#include "accel/dummy-cpus.h"
static void *dummy_cpu_thread_fn(void *arg)
{
@@ -42,6 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
qemu_guest_random_seed_thread_part2(cpu->random_seed);
do {
+ qemu_process_cpu_events(cpu);
bql_unlock();
#ifndef _WIN32
do {
@@ -56,7 +58,6 @@ static void *dummy_cpu_thread_fn(void *arg)
qemu_sem_wait(&cpu->sem);
#endif
bql_lock();
- qemu_wait_io_event(cpu);
} while (!cpu->unplug);
bql_unlock();
diff --git a/accel/dummy-cpus.h b/accel/dummy-cpus.h
new file mode 100644
index 0000000..d18dd0f
--- /dev/null
+++ b/accel/dummy-cpus.h
@@ -0,0 +1,14 @@
+/*
+ * Dummy cpu thread code
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ACCEL_DUMMY_CPUS_H
+#define ACCEL_DUMMY_CPUS_H
+
+void dummy_start_vcpu_thread(CPUState *cpu);
+
+#endif
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 12fc30c..8b794c2 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -48,18 +48,17 @@
*/
#include "qemu/osdep.h"
-#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
-#include "exec/address-spaces.h"
-#include "exec/exec-all.h"
+#include "qemu/queue.h"
#include "gdbstub/enums.h"
-#include "hw/boards.h"
-#include "system/accel-ops.h"
+#include "exec/cpu-common.h"
+#include "hw/core/cpu.h"
+#include "accel/accel-cpu-ops.h"
#include "system/cpus.h"
#include "system/hvf.h"
#include "system/hvf_int.h"
-#include "system/runstate.h"
-#include "qemu/guest-random.h"
+#include <mach/mach_time.h>
HVFState *hvf_state;
@@ -79,138 +78,17 @@ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
return NULL;
}
-struct mac_slot {
- int present;
- uint64_t size;
- uint64_t gpa_start;
- uint64_t gva;
-};
-
-struct mac_slot mac_slots[32];
-
-static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
-{
- struct mac_slot *macslot;
- hv_return_t ret;
-
- macslot = &mac_slots[slot->slot_id];
-
- if (macslot->present) {
- if (macslot->size != slot->size) {
- macslot->present = 0;
- ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
- assert_hvf_ok(ret);
- }
- }
-
- if (!slot->size) {
- return 0;
- }
-
- macslot->present = 1;
- macslot->gpa_start = slot->start;
- macslot->size = slot->size;
- ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
- assert_hvf_ok(ret);
- return 0;
-}
-
-static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
-{
- hvf_slot *mem;
- MemoryRegion *area = section->mr;
- bool writable = !area->readonly && !area->rom_device;
- hv_memory_flags_t flags;
- uint64_t page_size = qemu_real_host_page_size();
-
- if (!memory_region_is_ram(area)) {
- if (writable) {
- return;
- } else if (!memory_region_is_romd(area)) {
- /*
- * If the memory device is not in romd_mode, then we actually want
- * to remove the hvf memory slot so all accesses will trap.
- */
- add = false;
- }
- }
-
- if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
- !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
- /* Not page aligned, so we can not map as RAM */
- add = false;
- }
-
- mem = hvf_find_overlap_slot(
- section->offset_within_address_space,
- int128_get64(section->size));
-
- if (mem && add) {
- if (mem->size == int128_get64(section->size) &&
- mem->start == section->offset_within_address_space &&
- mem->mem == (memory_region_get_ram_ptr(area) +
- section->offset_within_region)) {
- return; /* Same region was attempted to register, go away. */
- }
- }
-
- /* Region needs to be reset. set the size to 0 and remap it. */
- if (mem) {
- mem->size = 0;
- if (do_hvf_set_memory(mem, 0)) {
- error_report("Failed to reset overlapping slot");
- abort();
- }
- }
-
- if (!add) {
- return;
- }
-
- if (area->readonly ||
- (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
- flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
- } else {
- flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
- }
-
- /* Now make a new slot. */
- int x;
-
- for (x = 0; x < hvf_state->num_slots; ++x) {
- mem = &hvf_state->slots[x];
- if (!mem->size) {
- break;
- }
- }
-
- if (x == hvf_state->num_slots) {
- error_report("No free slots");
- abort();
- }
-
- mem->size = int128_get64(section->size);
- mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
- mem->start = section->offset_within_address_space;
- mem->region = area;
-
- if (do_hvf_set_memory(mem, flags)) {
- error_report("Error registering new memory slot");
- abort();
- }
-}
-
static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
hvf_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
}
static void hvf_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -219,7 +97,7 @@ static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu,
run_on_cpu_data arg)
{
/* QEMU state is the reference, push it to HVF now and on next entry */
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
@@ -237,146 +115,16 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
}
-static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
-{
- hvf_slot *slot;
-
- slot = hvf_find_overlap_slot(
- section->offset_within_address_space,
- int128_get64(section->size));
-
- /* protect region against writes; begin tracking it */
- if (on) {
- slot->flags |= HVF_SLOT_LOG;
- hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
- HV_MEMORY_READ | HV_MEMORY_EXEC);
- /* stop tracking region*/
- } else {
- slot->flags &= ~HVF_SLOT_LOG;
- hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
- HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
- }
-}
-
-static void hvf_log_start(MemoryListener *listener,
- MemoryRegionSection *section, int old, int new)
-{
- if (old != 0) {
- return;
- }
-
- hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_log_stop(MemoryListener *listener,
- MemoryRegionSection *section, int old, int new)
-{
- if (new != 0) {
- return;
- }
-
- hvf_set_dirty_tracking(section, 0);
-}
-
-static void hvf_log_sync(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- /*
- * sync of dirty pages is handled elsewhere; just make sure we keep
- * tracking the region.
- */
- hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- hvf_set_phys_mem(section, true);
-}
-
-static void hvf_region_del(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- hvf_set_phys_mem(section, false);
-}
-
-static MemoryListener hvf_memory_listener = {
- .name = "hvf",
- .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
- .region_add = hvf_region_add,
- .region_del = hvf_region_del,
- .log_start = hvf_log_start,
- .log_stop = hvf_log_stop,
- .log_sync = hvf_log_sync,
-};
-
static void dummy_signal(int sig)
{
}
-bool hvf_allowed;
-
-static int hvf_accel_init(MachineState *ms)
+static void do_hvf_get_vcpu_exec_time(CPUState *cpu, run_on_cpu_data arg)
{
- int x;
- hv_return_t ret;
- HVFState *s;
- int pa_range = 36;
- MachineClass *mc = MACHINE_GET_CLASS(ms);
-
- if (mc->hvf_get_physical_address_range) {
- pa_range = mc->hvf_get_physical_address_range(ms);
- if (pa_range < 0) {
- return -EINVAL;
- }
- }
-
- ret = hvf_arch_vm_create(ms, (uint32_t)pa_range);
- assert_hvf_ok(ret);
-
- s = g_new0(HVFState, 1);
-
- s->num_slots = ARRAY_SIZE(s->slots);
- for (x = 0; x < s->num_slots; ++x) {
- s->slots[x].size = 0;
- s->slots[x].slot_id = x;
- }
-
- QTAILQ_INIT(&s->hvf_sw_breakpoints);
-
- hvf_state = s;
- memory_listener_register(&hvf_memory_listener, &address_space_memory);
-
- return hvf_arch_init();
-}
-
-static inline int hvf_gdbstub_sstep_flags(void)
-{
- return SSTEP_ENABLE | SSTEP_NOIRQ;
-}
-
-static void hvf_accel_class_init(ObjectClass *oc, void *data)
-{
- AccelClass *ac = ACCEL_CLASS(oc);
- ac->name = "HVF";
- ac->init_machine = hvf_accel_init;
- ac->allowed = &hvf_allowed;
- ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
-}
-
-static const TypeInfo hvf_accel_type = {
- .name = TYPE_HVF_ACCEL,
- .parent = TYPE_ACCEL,
- .class_init = hvf_accel_class_init,
-};
-
-static void hvf_type_init(void)
-{
- type_register_static(&hvf_accel_type);
+ int r = hv_vcpu_get_exec_time(cpu->accel->fd, arg.host_ptr);
+ assert_hvf_ok(r);
}
-type_init(hvf_type_init);
-
static void hvf_vcpu_destroy(CPUState *cpu)
{
hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd);
@@ -409,8 +157,8 @@ static int hvf_init_vcpu(CPUState *cpu)
#else
r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT);
#endif
- cpu->accel->dirty = true;
assert_hvf_ok(r);
+ cpu->vcpu_dirty = true;
cpu->accel->guest_debug_enabled = false;
@@ -444,13 +192,13 @@ static void *hvf_cpu_thread_fn(void *arg)
qemu_guest_random_seed_thread_part2(cpu->random_seed);
do {
+ qemu_process_cpu_events(cpu);
if (cpu_can_run(cpu)) {
r = hvf_vcpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
}
}
- qemu_wait_io_event(cpu);
} while (!cpu->unplug || cpu_can_run(cpu));
hvf_vcpu_destroy(cpu);
@@ -476,6 +224,34 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
}
+struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc)
+{
+ struct hvf_sw_breakpoint *bp;
+
+ QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) {
+ if (bp->pc == pc) {
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+int hvf_sw_breakpoints_active(CPUState *cpu)
+{
+ return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints);
+}
+
+static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg)
+{
+ hvf_arch_update_guest_debug(cpu);
+}
+
+int hvf_update_guest_debug(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL);
+ return 0;
+}
+
static int hvf_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len)
{
struct hvf_sw_breakpoint *bp;
@@ -578,12 +354,28 @@ static void hvf_remove_all_breakpoints(CPUState *cpu)
}
}
-static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
+static void hvf_get_vcpu_stats(CPUState *cpu, GString *buf)
+{
+ uint64_t time_mach; /* units of mach_absolute_time() */
+
+ run_on_cpu(cpu, do_hvf_get_vcpu_exec_time, RUN_ON_CPU_HOST_PTR(&time_mach));
+
+ mach_timebase_info_data_t timebase;
+ mach_timebase_info(&timebase);
+ uint64_t time_ns = time_mach * timebase.numer / timebase.denom;
+
+ g_string_append_printf(buf, "HVF cumulative execution time: %llu.%.3llus\n",
+ time_ns / 1000000000,
+ (time_ns % 1000000000) / 1000000);
+}
+
+static void hvf_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = hvf_start_vcpu_thread;
ops->kick_vcpu_thread = hvf_kick_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
@@ -595,7 +387,10 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
ops->remove_all_breakpoints = hvf_remove_all_breakpoints;
ops->update_guest_debug = hvf_update_guest_debug;
ops->supports_guest_debug = hvf_arch_supports_guest_debug;
+
+ ops->get_vcpu_stats = hvf_get_vcpu_stats;
};
+
static const TypeInfo hvf_accel_ops_type = {
.name = ACCEL_OPS_NAME("hvf"),
@@ -603,8 +398,10 @@ static const TypeInfo hvf_accel_ops_type = {
.class_init = hvf_accel_ops_class_init,
.abstract = true,
};
+
static void hvf_accel_ops_register_types(void)
{
type_register_static(&hvf_accel_ops_type);
}
+
type_init(hvf_accel_ops_register_types);
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
index d404e01..0a4b498 100644
--- a/accel/hvf/hvf-all.c
+++ b/accel/hvf/hvf-all.c
@@ -10,8 +10,25 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "accel/accel-ops.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "system/hvf.h"
#include "system/hvf_int.h"
+#include "hw/core/cpu.h"
+#include "hw/boards.h"
+#include "trace.h"
+
+bool hvf_allowed;
+
+struct mac_slot {
+ int present;
+ uint64_t size;
+ uint64_t gpa_start;
+ uint64_t gva;
+};
+
+struct mac_slot mac_slots[32];
const char *hvf_return_string(hv_return_t ret)
{
@@ -41,25 +58,257 @@ void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
abort();
}
-struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc)
+static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
{
- struct hvf_sw_breakpoint *bp;
+ struct mac_slot *macslot;
+ hv_return_t ret;
+
+ macslot = &mac_slots[slot->slot_id];
+
+ if (macslot->present) {
+ if (macslot->size != slot->size) {
+ macslot->present = 0;
+ trace_hvf_vm_unmap(macslot->gpa_start, macslot->size);
+ ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
+ assert_hvf_ok(ret);
+ }
+ }
+
+ if (!slot->size) {
+ return 0;
+ }
+
+ macslot->present = 1;
+ macslot->gpa_start = slot->start;
+ macslot->size = slot->size;
+ trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags,
+ flags & HV_MEMORY_READ ? 'R' : '-',
+ flags & HV_MEMORY_WRITE ? 'W' : '-',
+ flags & HV_MEMORY_EXEC ? 'X' : '-');
+ ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
+ assert_hvf_ok(ret);
+ return 0;
+}
+
+static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
+{
+ hvf_slot *mem;
+ MemoryRegion *area = section->mr;
+ bool writable = !area->readonly && !area->rom_device;
+ hv_memory_flags_t flags;
+ uint64_t page_size = qemu_real_host_page_size();
+
+ if (!memory_region_is_ram(area)) {
+ if (writable) {
+ return;
+ } else if (!memory_region_is_romd(area)) {
+ /*
+ * If the memory device is not in romd_mode, then we actually want
+ * to remove the hvf memory slot so all accesses will trap.
+ */
+ add = false;
+ }
+ }
+
+ if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
+ !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
+ /* Not page aligned, so we can not map as RAM */
+ add = false;
+ }
+
+ mem = hvf_find_overlap_slot(
+ section->offset_within_address_space,
+ int128_get64(section->size));
+
+ if (mem && add) {
+ if (mem->size == int128_get64(section->size) &&
+ mem->start == section->offset_within_address_space &&
+ mem->mem == (memory_region_get_ram_ptr(area) +
+ section->offset_within_region)) {
+ return; /* Same region was attempted to register, go away. */
+ }
+ }
+
+ /* Region needs to be reset. set the size to 0 and remap it. */
+ if (mem) {
+ mem->size = 0;
+ if (do_hvf_set_memory(mem, 0)) {
+ error_report("Failed to reset overlapping slot");
+ abort();
+ }
+ }
+
+ if (!add) {
+ return;
+ }
+
+ if (area->readonly ||
+ (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
+ flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
+ } else {
+ flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
+ }
+
+ /* Now make a new slot. */
+ int x;
- QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) {
- if (bp->pc == pc) {
- return bp;
+ for (x = 0; x < hvf_state->num_slots; ++x) {
+ mem = &hvf_state->slots[x];
+ if (!mem->size) {
+ break;
}
}
- return NULL;
+
+ if (x == hvf_state->num_slots) {
+ error_report("No free slots");
+ abort();
+ }
+
+ mem->size = int128_get64(section->size);
+ mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
+ mem->start = section->offset_within_address_space;
+ mem->region = area;
+
+ if (do_hvf_set_memory(mem, flags)) {
+ error_report("Error registering new memory slot");
+ abort();
+ }
}
-int hvf_sw_breakpoints_active(CPUState *cpu)
+static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
{
- return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints);
+ hvf_slot *slot;
+
+ slot = hvf_find_overlap_slot(
+ section->offset_within_address_space,
+ int128_get64(section->size));
+
+ /* protect region against writes; begin tracking it */
+ if (on) {
+ slot->flags |= HVF_SLOT_LOG;
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+ HV_MEMORY_READ | HV_MEMORY_EXEC);
+ /* stop tracking region*/
+ } else {
+ slot->flags &= ~HVF_SLOT_LOG;
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+ HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
+ }
}
-int hvf_update_guest_debug(CPUState *cpu)
+static void hvf_log_start(MemoryListener *listener,
+ MemoryRegionSection *section, int old, int new)
{
- hvf_arch_update_guest_debug(cpu);
- return 0;
+ if (old != 0) {
+ return;
+ }
+
+ hvf_set_dirty_tracking(section, 1);
+}
+
+static void hvf_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section, int old, int new)
+{
+ if (new != 0) {
+ return;
+ }
+
+ hvf_set_dirty_tracking(section, 0);
+}
+
+static void hvf_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /*
+ * sync of dirty pages is handled elsewhere; just make sure we keep
+ * tracking the region.
+ */
+ hvf_set_dirty_tracking(section, 1);
+}
+
+static void hvf_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ hvf_set_phys_mem(section, true);
+}
+
+static void hvf_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ hvf_set_phys_mem(section, false);
+}
+
+static MemoryListener hvf_memory_listener = {
+ .name = "hvf",
+ .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
+ .region_add = hvf_region_add,
+ .region_del = hvf_region_del,
+ .log_start = hvf_log_start,
+ .log_stop = hvf_log_stop,
+ .log_sync = hvf_log_sync,
+};
+
+static int hvf_accel_init(AccelState *as, MachineState *ms)
+{
+ int x;
+ hv_return_t ret;
+ HVFState *s = HVF_STATE(as);
+ int pa_range = 36;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+ if (mc->hvf_get_physical_address_range) {
+ pa_range = mc->hvf_get_physical_address_range(ms);
+ if (pa_range < 0) {
+ return -EINVAL;
+ }
+ }
+
+ ret = hvf_arch_vm_create(ms, (uint32_t)pa_range);
+ if (ret == HV_DENIED) {
+ error_report("Could not access HVF. Is the executable signed"
+ " with com.apple.security.hypervisor entitlement?");
+ exit(1);
+ }
+ assert_hvf_ok(ret);
+
+ s->num_slots = ARRAY_SIZE(s->slots);
+ for (x = 0; x < s->num_slots; ++x) {
+ s->slots[x].size = 0;
+ s->slots[x].slot_id = x;
+ }
+
+ QTAILQ_INIT(&s->hvf_sw_breakpoints);
+
+ hvf_state = s;
+ memory_listener_register(&hvf_memory_listener, &address_space_memory);
+
+ return hvf_arch_init();
}
+
+static int hvf_gdbstub_sstep_flags(AccelState *as)
+{
+ return SSTEP_ENABLE | SSTEP_NOIRQ;
+}
+
+static void hvf_accel_class_init(ObjectClass *oc, const void *data)
+{
+ AccelClass *ac = ACCEL_CLASS(oc);
+ ac->name = "HVF";
+ ac->init_machine = hvf_accel_init;
+ ac->allowed = &hvf_allowed;
+ ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
+}
+
+static const TypeInfo hvf_accel_type = {
+ .name = TYPE_HVF_ACCEL,
+ .parent = TYPE_ACCEL,
+ .instance_size = sizeof(HVFState),
+ .class_init = hvf_accel_class_init,
+};
+
+static void hvf_type_init(void)
+{
+ type_register_static(&hvf_accel_type);
+}
+
+type_init(hvf_type_init);
diff --git a/accel/hvf/trace-events b/accel/hvf/trace-events
new file mode 100644
index 0000000..2fd3e12
--- /dev/null
+++ b/accel/hvf/trace-events
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# See docs/devel/tracing.rst for syntax documentation.
+
+# hvf-accel-ops.c
+hvf_vm_map(uint64_t paddr, uint64_t size, void *vaddr, uint8_t flags, const char r, const char w, const char e) "paddr:0x%016"PRIx64" size:0x%08"PRIx64" vaddr:%p flags:0x%02x/%c%c%c"
+hvf_vm_unmap(uint64_t paddr, uint64_t size) "paddr:0x%016"PRIx64" size:0x%08"PRIx64
diff --git a/accel/hvf/trace.h b/accel/hvf/trace.h
new file mode 100644
index 0000000..83a1883
--- /dev/null
+++ b/accel/hvf/trace.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include "trace/trace-accel_hvf.h"
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index 54ea609..8ed6945 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -16,7 +16,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
-#include "system/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
#include "system/kvm.h"
#include "system/kvm_int.h"
#include "system/runstate.h"
@@ -47,13 +47,14 @@ static void *kvm_vcpu_thread_fn(void *arg)
qemu_guest_random_seed_thread_part2(cpu->random_seed);
do {
+ qemu_process_cpu_events(cpu);
+
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
}
}
- qemu_wait_io_event(cpu);
} while (!cpu->unplug || cpu_can_run(cpu));
kvm_destroy_vcpu(cpu);
@@ -90,7 +91,7 @@ static int kvm_update_guest_debug_ops(CPUState *cpu)
}
#endif
-static void kvm_accel_ops_class_init(ObjectClass *oc, void *data)
+static void kvm_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
@@ -101,6 +102,7 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, void *data)
ops->synchronize_post_init = kvm_cpu_synchronize_post_init;
ops->synchronize_state = kvm_cpu_synchronize_state;
ops->synchronize_pre_loadvm = kvm_cpu_synchronize_pre_loadvm;
+ ops->handle_interrupt = generic_handle_interrupt;
#ifdef TARGET_KVM_HAVE_GUEST_DEBUG
ops->update_guest_debug = kvm_update_guest_debug_ops;
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f89568b..5603192 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -32,9 +32,13 @@
#include "system/runstate.h"
#include "system/cpus.h"
#include "system/accel-blocker.h"
+#include "system/physmem.h"
+#include "system/ramblock.h"
+#include "accel/accel-ops.h"
#include "qemu/bswap.h"
-#include "exec/memory.h"
-#include "exec/ram_addr.h"
+#include "exec/tswap.h"
+#include "exec/target_page.h"
+#include "system/memory.h"
#include "qemu/event_notifier.h"
#include "qemu/main-loop.h"
#include "trace.h"
@@ -57,6 +61,11 @@
#include <sys/eventfd.h>
#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+# define KVM_HAVE_MCE_INJECTION 1
+#endif
+
+
/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
* need to use the real host PAGE_SIZE, as that's what KVM will use.
*/
@@ -93,6 +102,7 @@ bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
bool kvm_msi_use_devid;
+bool kvm_pre_fault_memory_supported;
static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
@@ -350,7 +360,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
{
KVMState *s = kvm_state;
- struct kvm_userspace_memory_region2 mem;
+ struct kvm_userspace_memory_region2 mem = {};
int ret;
mem.slot = slot->slot | (kml->as_id << 16);
@@ -406,7 +416,7 @@ err:
return ret;
}
-void kvm_park_vcpu(CPUState *cpu)
+static void kvm_park_vcpu(CPUState *cpu)
{
struct KVMParkedVcpu *vcpu;
@@ -418,7 +428,7 @@ void kvm_park_vcpu(CPUState *cpu)
QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
}
-int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
+static int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
{
struct KVMParkedVcpu *cpu;
int kvm_fd = -ENOENT;
@@ -437,9 +447,8 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
return kvm_fd;
}
-static void kvm_reset_parked_vcpus(void *param)
+static void kvm_reset_parked_vcpus(KVMState *s)
{
- KVMState *s = param;
struct KVMParkedVcpu *cpu;
QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
@@ -447,7 +456,13 @@ static void kvm_reset_parked_vcpus(void *param)
}
}
-int kvm_create_vcpu(CPUState *cpu)
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+static int kvm_create_vcpu(CPUState *cpu)
{
unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
KVMState *s = kvm_state;
@@ -466,7 +481,9 @@ int kvm_create_vcpu(CPUState *cpu)
cpu->kvm_fd = kvm_fd;
cpu->kvm_state = s;
- cpu->vcpu_dirty = true;
+ if (!s->guest_state_protected) {
+ cpu->vcpu_dirty = true;
+ }
cpu->dirty_pages = 0;
cpu->throttle_us_per_full = 0;
@@ -507,16 +524,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
goto err;
}
+ /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
ret = munmap(cpu->kvm_run, mmap_size);
if (ret < 0) {
goto err;
}
+ cpu->kvm_run = NULL;
if (cpu->kvm_dirty_gfns) {
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
if (ret < 0) {
goto err;
}
+ cpu->kvm_dirty_gfns = NULL;
}
kvm_park_vcpu(cpu);
@@ -540,6 +564,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
ret = kvm_create_vcpu(cpu);
if (ret < 0) {
error_setg_errno(errp, -ret,
@@ -595,6 +624,31 @@ err:
return ret;
}
+void kvm_close(void)
+{
+ CPUState *cpu;
+
+ if (!kvm_state || kvm_state->fd == -1) {
+ return;
+ }
+
+ CPU_FOREACH(cpu) {
+ cpu_remove_sync(cpu);
+ close(cpu->kvm_fd);
+ cpu->kvm_fd = -1;
+ close(cpu->kvm_vcpu_stats_fd);
+ cpu->kvm_vcpu_stats_fd = -1;
+ }
+
+ if (kvm_state && kvm_state->fd != -1) {
+ close(kvm_state->vmfd);
+ kvm_state->vmfd = -1;
+ close(kvm_state->fd);
+ kvm_state->fd = -1;
+ }
+ kvm_state = NULL;
+}
+
/*
* dirty pages logging control
*/
@@ -704,7 +758,7 @@ static void kvm_slot_sync_dirty_pages(KVMSlot *slot)
ram_addr_t start = slot->ram_start_offset;
ram_addr_t pages = slot->memory_size / qemu_real_host_page_size();
- cpu_physical_memory_set_dirty_lebitmap(slot->dirty_bmap, start, pages);
+ physical_memory_set_dirty_lebitmap(slot->dirty_bmap, start, pages);
}
static void kvm_slot_reset_dirty_pages(KVMSlot *slot)
@@ -1314,21 +1368,22 @@ bool kvm_hwpoisoned_mem(void)
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
-#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
- /* The kernel expects ioeventfd values in HOST_BIG_ENDIAN
- * endianness, but the memory core hands them in target endianness.
- * For example, PPC is always treated as big-endian even if running
- * on KVM and on PPC64LE. Correct here.
- */
- switch (size) {
- case 2:
- val = bswap16(val);
- break;
- case 4:
- val = bswap32(val);
- break;
+ if (target_needs_bswap()) {
+ /*
+ * The kernel expects ioeventfd values in HOST_BIG_ENDIAN
+ * endianness, but the memory core hands them in target endianness.
+ * For example, PPC is always treated as big-endian even if running
+ * on KVM and on PPC64LE. Correct here, swapping back.
+ */
+ switch (size) {
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ }
}
-#endif
return val;
}
@@ -1542,7 +1597,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
mem->ram = ram;
mem->flags = kvm_mem_flags(mr);
mem->guest_memfd = mr->ram_block->guest_memfd;
- mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
+ mem->guest_memfd_offset = mem->guest_memfd >= 0 ?
+ (uint8_t*)ram - mr->ram_block->host : 0;
kvm_slot_init_dirty_bitmap(mem);
err = kvm_set_user_memory_region(kml, mem, true);
@@ -2420,7 +2476,7 @@ static int kvm_recommended_vcpus(KVMState *s)
static int kvm_max_vcpus(KVMState *s)
{
- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS);
return (ret) ? ret : kvm_recommended_vcpus(s);
}
@@ -2450,13 +2506,10 @@ uint32_t kvm_dirty_ring_size(void)
return kvm_state->kvm_dirty_ring_size;
}
-static int do_kvm_create_vm(MachineState *ms, int type)
+static int do_kvm_create_vm(KVMState *s, int type)
{
- KVMState *s;
int ret;
- s = KVM_STATE(ms->accelerator);
-
do {
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
} while (ret == -EINTR);
@@ -2553,12 +2606,11 @@ static int kvm_setup_dirty_ring(KVMState *s)
return 0;
}
-static int kvm_init(MachineState *ms)
+static int kvm_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
static const char upgrade_note[] =
- "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
- "(see http://sourceforge.net/projects/kvm).\n";
+ "Please upgrade to at least kernel 4.5.\n";
const struct {
const char *name;
int num;
@@ -2568,15 +2620,13 @@ static int kvm_init(MachineState *ms)
{ /* end of list */ }
}, *nc = num_cpus;
int soft_vcpus_limit, hard_vcpus_limit;
- KVMState *s;
+ KVMState *s = KVM_STATE(as);
const KVMCapabilityInfo *missing_cap;
int ret;
int type;
qemu_mutex_init(&kml_slots_lock);
- s = KVM_STATE(ms->accelerator);
-
/*
* On systems where the kernel can support different base page
* sizes, host page size may be different from TARGET_PAGE_SIZE,
@@ -2628,7 +2678,7 @@ static int kvm_init(MachineState *ms)
goto err;
}
- ret = do_kvm_create_vm(ms, type);
+ ret = do_kvm_create_vm(s, type);
if (ret < 0) {
goto err;
}
@@ -2729,16 +2779,16 @@ static int kvm_init(MachineState *ms)
kvm_supported_memory_attributes = kvm_vm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
kvm_guest_memfd_supported =
- kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
- kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
+ kvm_vm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
+ kvm_vm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY);
if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
qemu_register_reset(kvm_unpoison_all, NULL);
- qemu_register_reset(kvm_reset_parked_vcpus, s);
if (s->kernel_irqchip_allowed) {
kvm_irqchip_create(s);
@@ -2887,44 +2937,48 @@ void kvm_cpu_synchronize_state(CPUState *cpu)
}
}
-static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
+static bool kvm_cpu_synchronize_put(CPUState *cpu, KvmPutState state,
+ const char *desc)
{
Error *err = NULL;
- int ret = kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE, &err);
+ int ret = kvm_arch_put_registers(cpu, state, &err);
if (ret) {
if (err) {
- error_reportf_err(err, "Restoring resisters after reset: ");
+ error_reportf_err(err, "Restoring resisters %s: ", desc);
} else {
- error_report("Failed to put registers after reset: %s",
+ error_report("Failed to put registers %s: %s", desc,
strerror(-ret));
}
- cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
- vm_stop(RUN_STATE_INTERNAL_ERROR);
+ return false;
}
cpu->vcpu_dirty = false;
+
+ return true;
+}
+
+static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
+{
+ if (!kvm_cpu_synchronize_put(cpu, KVM_PUT_RESET_STATE, "after reset")) {
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
}
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
{
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
+
+ if (cpu == first_cpu) {
+ kvm_reset_parked_vcpus(kvm_state);
+ }
}
static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
{
- Error *err = NULL;
- int ret = kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE, &err);
- if (ret) {
- if (err) {
- error_reportf_err(err, "Putting registers after init: ");
- } else {
- error_report("Failed to put registers after init: %s",
- strerror(-ret));
- }
+ if (!kvm_cpu_synchronize_put(cpu, KVM_PUT_FULL_STATE, "after init")) {
exit(1);
}
-
- cpu->vcpu_dirty = false;
}
void kvm_cpu_synchronize_post_init(CPUState *cpu)
@@ -2978,10 +3032,6 @@ static void kvm_eat_signals(CPUState *cpu)
if (kvm_immediate_exit) {
qatomic_set(&cpu->kvm_run->immediate_exit, 0);
- /* Write kvm_run->immediate_exit before the cpu->exit_request
- * write in kvm_cpu_exec.
- */
- smp_wmb();
return;
}
@@ -3073,6 +3123,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
rb = qemu_ram_block_from_host(addr, false, &offset);
+ ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm),
+ offset, size, to_private);
+ if (ret) {
+ error_report("Failed to notify the listener the state change of "
+ "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ goto out_unref;
+ }
+
if (to_private) {
if (rb->page_size != qemu_real_host_page_size()) {
/*
@@ -3099,7 +3158,6 @@ int kvm_cpu_exec(CPUState *cpu)
trace_kvm_cpu_exec();
if (kvm_arch_process_async_events(cpu)) {
- qatomic_set(&cpu->exit_request, 0);
return EXCP_HLT;
}
@@ -3110,24 +3168,16 @@ int kvm_cpu_exec(CPUState *cpu)
MemTxAttrs attrs;
if (cpu->vcpu_dirty) {
- Error *err = NULL;
- ret = kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE, &err);
- if (ret) {
- if (err) {
- error_reportf_err(err, "Putting registers after init: ");
- } else {
- error_report("Failed to put registers after init: %s",
- strerror(-ret));
- }
+ if (!kvm_cpu_synchronize_put(cpu, KVM_PUT_RUNTIME_STATE,
+ "at runtime")) {
ret = -1;
break;
}
-
- cpu->vcpu_dirty = false;
}
kvm_arch_pre_run(cpu, run);
- if (qatomic_read(&cpu->exit_request)) {
+ /* Corresponding store-release is in cpu_exit. */
+ if (qatomic_load_acquire(&cpu->exit_request)) {
trace_kvm_interrupt_exit_request();
/*
* KVM requires us to reenter the kernel after IO exits to complete
@@ -3137,13 +3187,15 @@ int kvm_cpu_exec(CPUState *cpu)
kvm_cpu_kick_self();
}
- /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit.
- * Matching barrier in kvm_eat_signals.
- */
- smp_rmb();
-
run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
+ /*
+ * After writing cpu->exit_request, cpu_exit() sends a signal that writes
+ * kvm->run->immediate_exit. The signal is already happening after the
+ * write to cpu->exit_request so, if KVM read kvm->run->immediate_exit
+ * as true, cpu->exit_request will always read as true.
+ */
+
attrs = kvm_arch_post_run(cpu, run);
#ifdef KVM_HAVE_MCE_INJECTION
@@ -3286,7 +3338,6 @@ int kvm_cpu_exec(CPUState *cpu)
vm_stop(RUN_STATE_INTERNAL_ERROR);
}
- qatomic_set(&cpu->exit_request, 0);
return ret;
}
@@ -3671,7 +3722,7 @@ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
have_sigbus_pending = true;
pending_sigbus_addr = addr;
pending_sigbus_code = code;
- qatomic_set(&cpu->exit_request, 1);
+ qatomic_set(&cpu->exit_request, true);
return 0;
#else
return 1;
@@ -3758,10 +3809,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
return r;
}
-static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as,
+static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as,
hwaddr start_addr, hwaddr size)
{
- KVMState *kvm = KVM_STATE(ms->accelerator);
+ KVMState *kvm = KVM_STATE(accel);
int i;
for (i = 0; i < kvm->nr_as; ++i) {
@@ -3952,12 +4003,12 @@ static void kvm_accel_instance_init(Object *obj)
* Returns: SSTEP_* flags that KVM supports for guest debug. The
* support is probed during kvm_init()
*/
-static int kvm_gdbstub_sstep_flags(void)
+static int kvm_gdbstub_sstep_flags(AccelState *as)
{
return kvm_sstep_flags;
}
-static void kvm_accel_class_init(ObjectClass *oc, void *data)
+static void kvm_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "KVM";
diff --git a/accel/meson.build b/accel/meson.build
index 5eaeb68..983dfd0 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -1,5 +1,6 @@
+common_ss.add(files('accel-common.c'))
specific_ss.add(files('accel-target.c'))
-system_ss.add(files('accel-system.c', 'accel-blocker.c'))
+system_ss.add(files('accel-system.c', 'accel-blocker.c', 'accel-qmp.c', 'accel-irq.c'))
user_ss.add(files('accel-user.c'))
subdir('tcg')
@@ -9,6 +10,7 @@ if have_system
subdir('kvm')
subdir('xen')
subdir('stubs')
+ subdir('mshv')
endif
# qtest
diff --git a/accel/mshv/irq.c b/accel/mshv/irq.c
new file mode 100644
index 0000000..adf8f33
--- /dev/null
+++ b/accel/mshv/irq.c
@@ -0,0 +1,399 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ * Stanislav Kinsburskii <skinsburskii@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "linux/mshv.h"
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "hw/hyperv/hvgdk_mini.h"
+#include "hw/intc/ioapic.h"
+#include "hw/pci/msi.h"
+#include "system/mshv.h"
+#include "system/mshv_int.h"
+#include "trace.h"
+#include <stdint.h>
+#include <sys/ioctl.h>
+
+#define MSHV_IRQFD_RESAMPLE_FLAG (1 << MSHV_IRQFD_BIT_RESAMPLE)
+#define MSHV_IRQFD_BIT_DEASSIGN_FLAG (1 << MSHV_IRQFD_BIT_DEASSIGN)
+
+static MshvMsiControl *msi_control;
+static QemuMutex msi_control_mutex;
+
+void mshv_init_msicontrol(void)
+{
+ qemu_mutex_init(&msi_control_mutex);
+ msi_control = g_new0(MshvMsiControl, 1);
+ msi_control->gsi_routes = g_hash_table_new(g_direct_hash, g_direct_equal);
+ msi_control->updated = false;
+}
+
+static int set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data)
+{
+ struct mshv_user_irq_entry *entry;
+ uint32_t high_addr = addr >> 32;
+ uint32_t low_addr = addr & 0xFFFFFFFF;
+ GHashTable *gsi_routes;
+
+ trace_mshv_set_msi_routing(gsi, addr, data);
+
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("gsi >= MSHV_MAX_MSI_ROUTES");
+ return -1;
+ }
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ gsi_routes = msi_control->gsi_routes;
+ entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+
+ if (entry
+ && entry->address_hi == high_addr
+ && entry->address_lo == low_addr
+ && entry->data == data)
+ {
+ /* nothing to update */
+ return 0;
+ }
+
+ /* free old entry */
+ g_free(entry);
+
+ /* create new entry */
+ entry = g_new0(struct mshv_user_irq_entry, 1);
+ entry->gsi = gsi;
+ entry->address_hi = high_addr;
+ entry->address_lo = low_addr;
+ entry->data = data;
+
+ g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), entry);
+ msi_control->updated = true;
+ }
+
+ return 0;
+}
+
+static int add_msi_routing(uint64_t addr, uint32_t data)
+{
+ struct mshv_user_irq_entry *route_entry;
+ uint32_t high_addr = addr >> 32;
+ uint32_t low_addr = addr & 0xFFFFFFFF;
+ int gsi;
+ GHashTable *gsi_routes;
+
+ trace_mshv_add_msi_routing(addr, data);
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ /* find an empty slot */
+ gsi = 0;
+ gsi_routes = msi_control->gsi_routes;
+ while (gsi < MSHV_MAX_MSI_ROUTES) {
+ route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+ if (!route_entry) {
+ break;
+ }
+ gsi++;
+ }
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("No empty gsi slot available");
+ return -1;
+ }
+
+ /* create new entry */
+ route_entry = g_new0(struct mshv_user_irq_entry, 1);
+ route_entry->gsi = gsi;
+ route_entry->address_hi = high_addr;
+ route_entry->address_lo = low_addr;
+ route_entry->data = data;
+
+ g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), route_entry);
+ msi_control->updated = true;
+ }
+
+ return gsi;
+}
+
+static int commit_msi_routing_table(int vm_fd)
+{
+ guint len;
+ int i, ret;
+ size_t table_size;
+ struct mshv_user_irq_table *table;
+ GHashTableIter iter;
+ gpointer key, value;
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ if (!msi_control->updated) {
+ /* nothing to update */
+ return 0;
+ }
+
+ /* Calculate the size of the table */
+ len = g_hash_table_size(msi_control->gsi_routes);
+ table_size = sizeof(struct mshv_user_irq_table)
+ + len * sizeof(struct mshv_user_irq_entry);
+ table = g_malloc0(table_size);
+
+ g_hash_table_iter_init(&iter, msi_control->gsi_routes);
+ i = 0;
+ while (g_hash_table_iter_next(&iter, &key, &value)) {
+ struct mshv_user_irq_entry *entry = value;
+ table->entries[i] = *entry;
+ i++;
+ }
+ table->nr = i;
+
+ trace_mshv_commit_msi_routing_table(vm_fd, len);
+
+ ret = ioctl(vm_fd, MSHV_SET_MSI_ROUTING, table);
+ g_free(table);
+ if (ret < 0) {
+ error_report("Failed to commit msi routing table");
+ return -1;
+ }
+ msi_control->updated = false;
+ }
+ return 0;
+}
+
+static int remove_msi_routing(uint32_t gsi)
+{
+ struct mshv_user_irq_entry *route_entry;
+ GHashTable *gsi_routes;
+
+ trace_mshv_remove_msi_routing(gsi);
+
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("Invalid GSI: %u", gsi);
+ return -1;
+ }
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ gsi_routes = msi_control->gsi_routes;
+ route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+ if (route_entry) {
+ g_hash_table_remove(gsi_routes, GINT_TO_POINTER(gsi));
+ g_free(route_entry);
+ msi_control->updated = true;
+ }
+ }
+
+ return 0;
+}
+
+/* Pass an eventfd which is to be used for injecting interrupts from userland */
+static int irqfd(int vm_fd, int fd, int resample_fd, uint32_t gsi,
+ uint32_t flags)
+{
+ int ret;
+ struct mshv_user_irqfd arg = {
+ .fd = fd,
+ .resamplefd = resample_fd,
+ .gsi = gsi,
+ .flags = flags,
+ };
+
+ ret = ioctl(vm_fd, MSHV_IRQFD, &arg);
+ if (ret < 0) {
+ error_report("Failed to set irqfd: gsi=%u, fd=%d", gsi, fd);
+ return -1;
+ }
+ return ret;
+}
+
+static int register_irqfd(int vm_fd, int event_fd, uint32_t gsi)
+{
+ int ret;
+
+ trace_mshv_register_irqfd(vm_fd, event_fd, gsi);
+
+ ret = irqfd(vm_fd, event_fd, 0, gsi, 0);
+ if (ret < 0) {
+ error_report("Failed to register irqfd: gsi=%u", gsi);
+ return -1;
+ }
+ return 0;
+}
+
+static int register_irqfd_with_resample(int vm_fd, int event_fd,
+ int resample_fd, uint32_t gsi)
+{
+ int ret;
+ uint32_t flags = MSHV_IRQFD_RESAMPLE_FLAG;
+
+ ret = irqfd(vm_fd, event_fd, resample_fd, gsi, flags);
+ if (ret < 0) {
+ error_report("Failed to register irqfd with resample: gsi=%u", gsi);
+ return -errno;
+ }
+ return 0;
+}
+
+static int unregister_irqfd(int vm_fd, int event_fd, uint32_t gsi)
+{
+ int ret;
+ uint32_t flags = MSHV_IRQFD_BIT_DEASSIGN_FLAG;
+
+ ret = irqfd(vm_fd, event_fd, 0, gsi, flags);
+ if (ret < 0) {
+ error_report("Failed to unregister irqfd: gsi=%u", gsi);
+ return -errno;
+ }
+ return 0;
+}
+
+static int irqchip_update_irqfd_notifier_gsi(const EventNotifier *event,
+ const EventNotifier *resample,
+ int virq, bool add)
+{
+ int fd = event_notifier_get_fd(event);
+ int rfd = resample ? event_notifier_get_fd(resample) : -1;
+ int vm_fd = mshv_state->vm;
+
+ trace_mshv_irqchip_update_irqfd_notifier_gsi(fd, rfd, virq, add);
+
+ if (!add) {
+ return unregister_irqfd(vm_fd, fd, virq);
+ }
+
+ if (rfd > 0) {
+ return register_irqfd_with_resample(vm_fd, fd, rfd, virq);
+ }
+
+ return register_irqfd(vm_fd, fd, virq);
+}
+
+
+int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev)
+{
+ MSIMessage msg = { 0, 0 };
+ int virq = 0;
+
+ if (pci_available && dev) {
+ msg = pci_get_msi_message(dev, vector);
+ virq = add_msi_routing(msg.address, le32_to_cpu(msg.data));
+ }
+
+ return virq;
+}
+
+void mshv_irqchip_release_virq(int virq)
+{
+ remove_msi_routing(virq);
+}
+
+int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev)
+{
+ int ret;
+
+ ret = set_msi_routing(virq, msg.address, le32_to_cpu(msg.data));
+ if (ret < 0) {
+ error_report("Failed to set msi routing");
+ return -1;
+ }
+
+ return 0;
+}
+
+int mshv_request_interrupt(MshvState *mshv_state, uint32_t interrupt_type, uint32_t vector,
+ uint32_t vp_index, bool logical_dest_mode,
+ bool level_triggered)
+{
+ int ret;
+ int vm_fd = mshv_state->vm;
+
+ if (vector == 0) {
+ warn_report("Ignoring request for interrupt vector 0");
+ return 0;
+ }
+
+ union hv_interrupt_control control = {
+ .interrupt_type = interrupt_type,
+ .level_triggered = level_triggered,
+ .logical_dest_mode = logical_dest_mode,
+ .rsvd = 0,
+ };
+
+ struct hv_input_assert_virtual_interrupt arg = {0};
+ arg.control = control;
+ arg.dest_addr = (uint64_t)vp_index;
+ arg.vector = vector;
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_ASSERT_VIRTUAL_INTERRUPT;
+ args.in_sz = sizeof(arg);
+ args.in_ptr = (uint64_t)&arg;
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to request interrupt");
+ return -errno;
+ }
+ return 0;
+}
+
+void mshv_irqchip_commit_routes(void)
+{
+ int ret;
+ int vm_fd = mshv_state->vm;
+
+ ret = commit_msi_routing_table(vm_fd);
+ if (ret < 0) {
+ error_report("Failed to commit msi routing table");
+ abort();
+ }
+}
+
+int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *event,
+ const EventNotifier *resample,
+ int virq)
+{
+ return irqchip_update_irqfd_notifier_gsi(event, resample, virq, true);
+}
+
+int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *event,
+ int virq)
+{
+ return irqchip_update_irqfd_notifier_gsi(event, NULL, virq, false);
+}
+
+int mshv_reserve_ioapic_msi_routes(int vm_fd)
+{
+ int ret, gsi;
+
+ /*
+ * Reserve GSI 0-23 for IOAPIC pins, to avoid conflicts of legacy
+ * peripherals with MSI-X devices
+ */
+ for (gsi = 0; gsi < IOAPIC_NUM_PINS; gsi++) {
+ ret = add_msi_routing(0, 0);
+ if (ret < 0) {
+ error_report("Failed to reserve GSI %d", gsi);
+ return -1;
+ }
+ }
+
+ ret = commit_msi_routing_table(vm_fd);
+ if (ret < 0) {
+ error_report("Failed to commit reserved IOAPIC MSI routes");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
new file mode 100644
index 0000000..0e2164a
--- /dev/null
+++ b/accel/mshv/mem.c
@@ -0,0 +1,563 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors:
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/lockable.h"
+#include "qemu/error-report.h"
+#include "qemu/rcu.h"
+#include "linux/mshv.h"
+#include "system/address-spaces.h"
+#include "system/mshv.h"
+#include "system/mshv_int.h"
+#include "exec/memattrs.h"
+#include <sys/ioctl.h>
+#include "trace.h"
+
+typedef struct SlotsRCUReclaim {
+ struct rcu_head rcu;
+ GList *old_head;
+ MshvMemorySlot *removed_slot;
+} SlotsRCUReclaim;
+
+static void rcu_reclaim_slotlist(struct rcu_head *rcu)
+{
+ SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu);
+ g_list_free(r->old_head);
+ g_free(r->removed_slot);
+ g_free(r);
+}
+
+static void publish_slots(GList *new_head, GList *old_head,
+ MshvMemorySlot *removed_slot)
+{
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ qatomic_store_release(&manager->slots, new_head);
+
+ SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1);
+ r->old_head = old_head;
+ r->removed_slot = removed_slot;
+
+ call_rcu1(&r->rcu, rcu_reclaim_slotlist);
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static int remove_slot(MshvMemorySlot *slot)
+{
+ GList *old_head, *new_head;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ old_head = qatomic_load_acquire(&manager->slots);
+
+ if (!g_list_find(old_head, slot)) {
+ error_report("slot requested for removal not found");
+ return -1;
+ }
+
+ new_head = g_list_copy(old_head);
+ new_head = g_list_remove(new_head, slot);
+ manager->n_slots--;
+
+ publish_slots(new_head, old_head, slot);
+
+ return 0;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr,
+ uint64_t size, bool readonly)
+{
+ GList *old_head, *new_head;
+ MshvMemorySlot *slot;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ old_head = qatomic_load_acquire(&manager->slots);
+
+ if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) {
+ error_report("no free memory slots available");
+ return NULL;
+ }
+
+ slot = g_new0(MshvMemorySlot, 1);
+ slot->guest_phys_addr = gpa;
+ slot->userspace_addr = userspace_addr;
+ slot->memory_size = size;
+ slot->readonly = readonly;
+
+ new_head = g_list_copy(old_head);
+ new_head = g_list_append(new_head, slot);
+ manager->n_slots++;
+
+ publish_slots(new_head, old_head, NULL);
+
+ return slot;
+}
+
+static int slot_overlaps(const MshvMemorySlot *slot1,
+ const MshvMemorySlot *slot2)
+{
+ uint64_t start_1 = slot1->userspace_addr,
+ start_2 = slot2->userspace_addr;
+ size_t len_1 = slot1->memory_size,
+ len_2 = slot2->memory_size;
+
+ if (slot1 == slot2) {
+ return -1;
+ }
+
+ return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1;
+}
+
+static bool is_mapped(MshvMemorySlot *slot)
+{
+ /* Subsequent reads of mapped field see a fully-initialized slot */
+ return qatomic_load_acquire(&slot->mapped);
+}
+
+/*
+ * Find slot that is:
+ * - overlapping in userspace
+ * - currently mapped in the guest
+ *
+ * Needs to be called with mshv_state->msm.mutex or RCU read lock held.
+ */
+static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot)
+{
+ GList *found;
+ MshvMemorySlot *overlap_slot;
+
+ found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps);
+
+ if (!found) {
+ return NULL;
+ }
+
+ overlap_slot = found->data;
+ if (!overlap_slot || !is_mapped(overlap_slot)) {
+ return NULL;
+ }
+
+ return overlap_slot;
+}
+
+static int set_guest_memory(int vm_fd,
+ const struct mshv_user_mem_region *region)
+{
+ int ret;
+
+ ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
+ if (ret < 0) {
+ error_report("failed to set guest memory: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map)
+{
+ struct mshv_user_mem_region region = {0};
+
+ region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT;
+ region.size = slot->memory_size;
+ region.userspace_addr = slot->userspace_addr;
+
+ if (!map) {
+ region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
+ trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
+ return set_guest_memory(vm_fd, &region);
+ }
+
+ region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE);
+ if (!slot->readonly) {
+ region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE);
+ }
+
+ trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
+ return set_guest_memory(vm_fd, &region);
+}
+
+static int slot_matches_region(const MshvMemorySlot *slot1,
+ const MshvMemorySlot *slot2)
+{
+ return (slot1->guest_phys_addr == slot2->guest_phys_addr &&
+ slot1->userspace_addr == slot2->userspace_addr &&
+ slot1->memory_size == slot2->memory_size) ? 0 : -1;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size,
+ uint64_t userspace_addr)
+{
+ MshvMemorySlot ref_slot = {
+ .guest_phys_addr = gpa,
+ .userspace_addr = userspace_addr,
+ .memory_size = size,
+ };
+ GList *found;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+ found = g_list_find_custom(manager->slots, &ref_slot,
+ (GCompareFunc) slot_matches_region);
+
+ return found ? found->data : NULL;
+}
+
+static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p)
+{
+ uint64_t gpa_offset, gpa = *gpa_p;
+
+ gpa_offset = gpa - slot->guest_phys_addr;
+ return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size)
+ ? 0 : -1;
+}
+
+/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */
+static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa)
+{
+ GList *found;
+ MshvMemorySlot *slot;
+
+ trace_mshv_find_slot_by_gpa(gpa);
+
+ found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa);
+ if (found) {
+ slot = found->data;
+ trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr,
+ slot->memory_size);
+ return slot;
+ }
+
+ return NULL;
+}
+
+/* Needs to be called with mshv_state->msm.mutex held */
+static void set_mapped(MshvMemorySlot *slot, bool mapped)
+{
+ /* prior writes to mapped field becomes visible before readers see slot */
+ qatomic_store_release(&slot->mapped, mapped);
+}
+
+MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa)
+{
+ MshvMemorySlot *gpa_slot, *overlap_slot;
+ GList *head;
+ int ret;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ /* fast path, called often by unmapped_gpa vm exit */
+ WITH_RCU_READ_LOCK_GUARD() {
+ assert(manager);
+ head = qatomic_load_acquire(&manager->slots);
+ /* return early if no slot is found */
+ gpa_slot = find_mem_slot_by_gpa(head, gpa);
+ if (gpa_slot == NULL) {
+ return MshvRemapNoMapping;
+ }
+
+ /* return early if no overlapping slot is found */
+ overlap_slot = find_overlap_mem_slot(head, gpa_slot);
+ if (overlap_slot == NULL) {
+ return MshvRemapNoOverlap;
+ }
+ }
+
+ /*
+ * We'll modify the mapping list, so we need to upgrade to mutex and
+ * recheck.
+ */
+ assert(manager);
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ /* return early if no slot is found */
+ gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa);
+ if (gpa_slot == NULL) {
+ return MshvRemapNoMapping;
+ }
+
+ /* return early if no overlapping slot is found */
+ overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot);
+ if (overlap_slot == NULL) {
+ return MshvRemapNoOverlap;
+ }
+
+ /* unmap overlapping slot */
+ ret = map_or_unmap(vm_fd, overlap_slot, false);
+ if (ret < 0) {
+ error_report("failed to unmap overlap region");
+ abort();
+ }
+ set_mapped(overlap_slot, false);
+ warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+ overlap_slot->userspace_addr,
+ overlap_slot->guest_phys_addr,
+ overlap_slot->memory_size);
+
+ /* map region for gpa */
+ ret = map_or_unmap(vm_fd, gpa_slot, true);
+ if (ret < 0) {
+ error_report("failed to map new region");
+ abort();
+ }
+ set_mapped(gpa_slot, true);
+ warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
+ gpa_slot->userspace_addr, gpa_slot->guest_phys_addr,
+ gpa_slot->memory_size);
+
+ return MshvRemapOk;
+}
+
+static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size,
+ uint8_t *data)
+{
+ warn_report("read from unmapped mmio region gpa=0x%lx size=%lu", gpa, size);
+
+ if (size == 0 || size > 8) {
+ error_report("invalid size %lu for reading from unmapped mmio region",
+ size);
+ return -1;
+ }
+
+ memset(data, 0xFF, size);
+
+ return 0;
+}
+
+int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size,
+ bool is_secure_mode, bool instruction_fetch)
+{
+ int ret;
+ MemTxAttrs memattr = { .secure = is_secure_mode };
+
+ if (instruction_fetch) {
+ trace_mshv_insn_fetch(gpa, size);
+ } else {
+ trace_mshv_mem_read(gpa, size);
+ }
+
+ ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
+ size, false);
+ if (ret == MEMTX_OK) {
+ return 0;
+ }
+
+ if (ret == MEMTX_DECODE_ERROR) {
+ return handle_unmapped_mmio_region_read(gpa, size, data);
+ }
+
+ error_report("failed to read guest memory at 0x%lx", gpa);
+ return -1;
+}
+
+int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
+ bool is_secure_mode)
+{
+ int ret;
+ MemTxAttrs memattr = { .secure = is_secure_mode };
+
+ trace_mshv_mem_write(gpa, size);
+ ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
+ size, true);
+ if (ret == MEMTX_OK) {
+ return 0;
+ }
+
+ if (ret == MEMTX_DECODE_ERROR) {
+ warn_report("write to unmapped mmio region gpa=0x%lx size=%lu", gpa,
+ size);
+ return 0;
+ }
+
+ error_report("Failed to write guest memory");
+ return -1;
+}
+
+static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size,
+ uint64_t userspace_addr)
+{
+ int ret;
+ MshvMemorySlot *slot;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ slot = find_mem_slot_by_region(gpa, size, userspace_addr);
+ if (!slot) {
+ trace_mshv_skip_unset_mem(userspace_addr, gpa, size);
+ /* no work to do */
+ return 0;
+ }
+
+ if (!is_mapped(slot)) {
+ /* remove slot, no need to unmap */
+ return remove_slot(slot);
+ }
+
+ ret = map_or_unmap(vm_fd, slot, false);
+ if (ret < 0) {
+ error_report("failed to unmap memory region");
+ return ret;
+ }
+ return remove_slot(slot);
+}
+
+static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly,
+ uint64_t userspace_addr)
+{
+ MshvMemorySlot *slot, *overlap_slot;
+ int ret;
+ MshvMemorySlotManager *manager = &mshv_state->msm;
+
+ assert(manager);
+
+ QEMU_LOCK_GUARD(&manager->mutex);
+
+ slot = find_mem_slot_by_region(gpa, size, userspace_addr);
+ if (slot) {
+ error_report("memory region already mapped at gpa=0x%lx, "
+ "userspace_addr=0x%lx, size=0x%lx",
+ slot->guest_phys_addr, slot->userspace_addr,
+ slot->memory_size);
+ return -1;
+ }
+
+ slot = append_slot(gpa, userspace_addr, size, readonly);
+
+ overlap_slot = find_overlap_mem_slot(manager->slots, slot);
+ if (overlap_slot) {
+ trace_mshv_remap_attempt(slot->userspace_addr,
+ slot->guest_phys_addr,
+ slot->memory_size);
+ warn_report("attempt to map region [0x%lx-0x%lx], while "
+ "[0x%lx-0x%lx] is already mapped in the guest",
+ userspace_addr, userspace_addr + size - 1,
+ overlap_slot->userspace_addr,
+ overlap_slot->userspace_addr +
+ overlap_slot->memory_size - 1);
+
+ /* do not register mem slot in hv, but record for later swap-in */
+ set_mapped(slot, false);
+
+ return 0;
+ }
+
+ ret = map_or_unmap(vm_fd, slot, true);
+ if (ret < 0) {
+ error_report("failed to map memory region");
+ return -1;
+ }
+ set_mapped(slot, true);
+
+ return 0;
+}
+
+static int set_memory(uint64_t gpa, uint64_t size, bool readonly,
+ uint64_t userspace_addr, bool add)
+{
+ int vm_fd = mshv_state->vm;
+
+ if (add) {
+ return tracked_map(vm_fd, gpa, size, readonly, userspace_addr);
+ }
+
+ return tracked_unmap(vm_fd, gpa, size, userspace_addr);
+}
+
+/*
+ * Calculate and align the start address and the size of the section.
+ * Return the size. If the size is 0, the aligned section is empty.
+ */
+static hwaddr align_section(MemoryRegionSection *section, hwaddr *start)
+{
+ hwaddr size = int128_get64(section->size);
+ hwaddr delta, aligned;
+
+ /*
+ * works in page size chunks, but the function may be called
+ * with sub-page size and unaligned start address. Pad the start
+ * address to next and truncate size to previous page boundary.
+ */
+ aligned = ROUND_UP(section->offset_within_address_space,
+ qemu_real_host_page_size());
+ delta = aligned - section->offset_within_address_space;
+ *start = aligned;
+ if (delta > size) {
+ return 0;
+ }
+
+ return (size - delta) & qemu_real_host_page_mask();
+}
+
+void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
+ bool add)
+{
+ int ret = 0;
+ MemoryRegion *area = section->mr;
+ bool writable = !area->readonly && !area->rom_device;
+ hwaddr start_addr, mr_offset, size;
+ void *ram;
+
+ size = align_section(section, &start_addr);
+ trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
+
+ size = align_section(section, &start_addr);
+ trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
+
+ /*
+ * If the memory device is a writable non-ram area, we do not
+ * want to map it into the guest memory. If it is not a ROM device,
+ * we want to remove mshv memory mapping, so accesses will trap.
+ */
+ if (!memory_region_is_ram(area)) {
+ if (writable) {
+ return;
+ } else if (!area->romd_mode) {
+ add = false;
+ }
+ }
+
+ if (!size) {
+ return;
+ }
+
+ mr_offset = section->offset_within_region + start_addr -
+ section->offset_within_address_space;
+
+ ram = memory_region_get_ram_ptr(area) + mr_offset;
+
+ ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add);
+ if (ret < 0) {
+ error_report("failed to set memory region");
+ abort();
+ }
+}
+
+void mshv_init_memory_slot_manager(MshvState *mshv_state)
+{
+ MshvMemorySlotManager *manager;
+
+ assert(mshv_state);
+ manager = &mshv_state->msm;
+
+ manager->n_slots = 0;
+ manager->slots = NULL;
+ qemu_mutex_init(&manager->mutex);
+}
diff --git a/accel/mshv/meson.build b/accel/mshv/meson.build
new file mode 100644
index 0000000..d3a2b32
--- /dev/null
+++ b/accel/mshv/meson.build
@@ -0,0 +1,9 @@
+mshv_ss = ss.source_set()
+mshv_ss.add(if_true: files(
+ 'irq.c',
+ 'mem.c',
+ 'msr.c',
+ 'mshv-all.c'
+))
+
+specific_ss.add_all(when: 'CONFIG_MSHV', if_true: mshv_ss)
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
new file mode 100644
index 0000000..45174f7
--- /dev/null
+++ b/accel/mshv/mshv-all.c
@@ -0,0 +1,727 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors:
+ * Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ * Jinank Jain <jinankjain@microsoft.com>
+ * Wei Liu <liuwe@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/event_notifier.h"
+#include "qemu/module.h"
+#include "qemu/main-loop.h"
+#include "hw/boards.h"
+
+#include "hw/hyperv/hvhdk.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "hw/hyperv/hvgdk.h"
+#include "hw/hyperv/hvgdk_mini.h"
+#include "linux/mshv.h"
+
+#include "qemu/accel.h"
+#include "qemu/guest-random.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
+#include "system/cpus.h"
+#include "system/runstate.h"
+#include "system/accel-blocker.h"
+#include "system/address-spaces.h"
+#include "system/mshv.h"
+#include "system/mshv_int.h"
+#include "system/reset.h"
+#include "trace.h"
+#include <err.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+
+#define TYPE_MSHV_ACCEL ACCEL_CLASS_NAME("mshv")
+
+DECLARE_INSTANCE_CHECKER(MshvState, MSHV_STATE, TYPE_MSHV_ACCEL)
+
+bool mshv_allowed;
+
+MshvState *mshv_state;
+
+static int init_mshv(int *mshv_fd)
+{
+ int fd = open("/dev/mshv", O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ error_report("Failed to open /dev/mshv: %s", strerror(errno));
+ return -1;
+ }
+ *mshv_fd = fd;
+ return 0;
+}
+
+/* freeze 1 to pause, 0 to resume */
+static int set_time_freeze(int vm_fd, int freeze)
+{
+ int ret;
+ struct hv_input_set_partition_property in = {0};
+ in.property_code = HV_PARTITION_PROPERTY_TIME_FREEZE;
+ in.property_value = freeze;
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)&in;
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to set time freeze");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int pause_vm(int vm_fd)
+{
+ int ret;
+
+ ret = set_time_freeze(vm_fd, 1);
+ if (ret < 0) {
+ error_report("Failed to pause partition: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int resume_vm(int vm_fd)
+{
+ int ret;
+
+ ret = set_time_freeze(vm_fd, 0);
+ if (ret < 0) {
+ error_report("Failed to resume partition: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int create_partition(int mshv_fd, int *vm_fd)
+{
+ int ret;
+ struct mshv_create_partition args = {0};
+
+ /* Initialize pt_flags with the desired features */
+ uint64_t pt_flags = (1ULL << MSHV_PT_BIT_LAPIC) |
+ (1ULL << MSHV_PT_BIT_X2APIC) |
+ (1ULL << MSHV_PT_BIT_GPA_SUPER_PAGES);
+
+ /* Set default isolation type */
+ uint64_t pt_isolation = MSHV_PT_ISOLATION_NONE;
+
+ args.pt_flags = pt_flags;
+ args.pt_isolation = pt_isolation;
+
+ ret = ioctl(mshv_fd, MSHV_CREATE_PARTITION, &args);
+ if (ret < 0) {
+ error_report("Failed to create partition: %s", strerror(errno));
+ return -1;
+ }
+
+ *vm_fd = ret;
+ return 0;
+}
+
+static int set_synthetic_proc_features(int vm_fd)
+{
+ int ret;
+ struct hv_input_set_partition_property in = {0};
+ union hv_partition_synthetic_processor_features features = {0};
+
+ /* Access the bitfield and set the desired features */
+ features.hypervisor_present = 1;
+ features.hv1 = 1;
+ features.access_partition_reference_counter = 1;
+ features.access_synic_regs = 1;
+ features.access_synthetic_timer_regs = 1;
+ features.access_partition_reference_tsc = 1;
+ features.access_frequency_regs = 1;
+ features.access_intr_ctrl_regs = 1;
+ features.access_vp_index = 1;
+ features.access_hypercall_regs = 1;
+ features.tb_flush_hypercalls = 1;
+ features.synthetic_cluster_ipi = 1;
+ features.direct_synthetic_timers = 1;
+
+ mshv_arch_amend_proc_features(&features);
+
+ in.property_code = HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES;
+ in.property_value = features.as_uint64[0];
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)&in;
+
+ trace_mshv_hvcall_args("synthetic_proc_features", args.code, args.in_sz);
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to set synthethic proc features");
+ return -errno;
+ }
+ return 0;
+}
+
+static int initialize_vm(int vm_fd)
+{
+ int ret = ioctl(vm_fd, MSHV_INITIALIZE_PARTITION);
+ if (ret < 0) {
+ error_report("Failed to initialize partition: %s", strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static int create_vm(int mshv_fd, int *vm_fd)
+{
+ int ret = create_partition(mshv_fd, vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = set_synthetic_proc_features(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = initialize_vm(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = mshv_reserve_ioapic_msi_routes(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = mshv_arch_post_init_vm(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* Always create a frozen partition */
+ pause_vm(*vm_fd);
+
+ return 0;
+}
+
+static void mem_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ MshvMemoryListener *mml;
+ mml = container_of(listener, MshvMemoryListener, listener);
+ memory_region_ref(section->mr);
+ mshv_set_phys_mem(mml, section, true);
+}
+
+static void mem_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ MshvMemoryListener *mml;
+ mml = container_of(listener, MshvMemoryListener, listener);
+ mshv_set_phys_mem(mml, section, false);
+ memory_region_unref(section->mr);
+}
+
+typedef enum {
+ DATAMATCH_NONE,
+ DATAMATCH_U32,
+ DATAMATCH_U64,
+} DatamatchTag;
+
+typedef struct {
+ DatamatchTag tag;
+ union {
+ uint32_t u32;
+ uint64_t u64;
+ } value;
+} Datamatch;
+
+/* flags: determine whether to de/assign */
+static int ioeventfd(int vm_fd, int event_fd, uint64_t addr, Datamatch dm,
+ uint32_t flags)
+{
+ struct mshv_user_ioeventfd args = {0};
+ args.fd = event_fd;
+ args.addr = addr;
+ args.flags = flags;
+
+ if (dm.tag == DATAMATCH_NONE) {
+ args.datamatch = 0;
+ } else {
+ flags |= BIT(MSHV_IOEVENTFD_BIT_DATAMATCH);
+ args.flags = flags;
+ if (dm.tag == DATAMATCH_U64) {
+ args.len = sizeof(uint64_t);
+ args.datamatch = dm.value.u64;
+ } else {
+ args.len = sizeof(uint32_t);
+ args.datamatch = dm.value.u32;
+ }
+ }
+
+ return ioctl(vm_fd, MSHV_IOEVENTFD, &args);
+}
+
+static int unregister_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr)
+{
+ uint32_t flags = 0;
+ Datamatch dm = {0};
+
+ flags |= BIT(MSHV_IOEVENTFD_BIT_DEASSIGN);
+ dm.tag = DATAMATCH_NONE;
+
+ return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
+}
+
+static int register_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr,
+ uint64_t val, bool is_64bit, bool is_datamatch)
+{
+ uint32_t flags = 0;
+ Datamatch dm = {0};
+
+ if (!is_datamatch) {
+ dm.tag = DATAMATCH_NONE;
+ } else if (is_64bit) {
+ dm.tag = DATAMATCH_U64;
+ dm.value.u64 = val;
+ } else {
+ dm.tag = DATAMATCH_U32;
+ dm.value.u32 = val;
+ }
+
+ return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
+}
+
+static void mem_ioeventfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data,
+ EventNotifier *e)
+{
+ int fd = event_notifier_get_fd(e);
+ int ret;
+ bool is_64 = int128_get64(section->size) == 8;
+ uint64_t addr = section->offset_within_address_space;
+
+ trace_mshv_mem_ioeventfd_add(addr, int128_get64(section->size), data);
+
+ ret = register_ioevent(mshv_state->vm, fd, addr, data, is_64, match_data);
+
+ if (ret < 0) {
+ error_report("Failed to register ioeventfd: %s (%d)", strerror(-ret),
+ -ret);
+ abort();
+ }
+}
+
+static void mem_ioeventfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data,
+ EventNotifier *e)
+{
+ int fd = event_notifier_get_fd(e);
+ int ret;
+ uint64_t addr = section->offset_within_address_space;
+
+ trace_mshv_mem_ioeventfd_del(section->offset_within_address_space,
+ int128_get64(section->size), data);
+
+ ret = unregister_ioevent(mshv_state->vm, fd, addr);
+ if (ret < 0) {
+ error_report("Failed to unregister ioeventfd: %s (%d)", strerror(-ret),
+ -ret);
+ abort();
+ }
+}
+
+static MemoryListener mshv_memory_listener = {
+ .name = "mshv",
+ .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
+ .region_add = mem_region_add,
+ .region_del = mem_region_del,
+ .eventfd_add = mem_ioeventfd_add,
+ .eventfd_del = mem_ioeventfd_del,
+};
+
+static MemoryListener mshv_io_listener = {
+ .name = "mshv", .priority = MEMORY_LISTENER_PRIORITY_DEV_BACKEND,
+ /* MSHV does not support PIO eventfd */
+};
+
+static void register_mshv_memory_listener(MshvState *s, MshvMemoryListener *mml,
+ AddressSpace *as, int as_id,
+ const char *name)
+{
+ int i;
+
+ mml->listener = mshv_memory_listener;
+ mml->listener.name = name;
+ memory_listener_register(&mml->listener, as);
+ for (i = 0; i < s->nr_as; ++i) {
+ if (!s->as[i].as) {
+ s->as[i].as = as;
+ s->as[i].ml = mml;
+ break;
+ }
+ }
+}
+
+int mshv_hvcall(int fd, const struct mshv_root_hvcall *args)
+{
+ int ret = 0;
+
+ ret = ioctl(fd, MSHV_ROOT_HVCALL, args);
+ if (ret < 0) {
+ error_report("Failed to perform hvcall: %s", strerror(errno));
+ return -1;
+ }
+ return ret;
+}
+
+static int mshv_init_vcpu(CPUState *cpu)
+{
+ int vm_fd = mshv_state->vm;
+ uint8_t vp_index = cpu->cpu_index;
+ int ret;
+
+ cpu->accel = g_new0(AccelCPUState, 1);
+ mshv_arch_init_vcpu(cpu);
+
+ ret = mshv_create_vcpu(vm_fd, vp_index, &cpu->accel->cpufd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ cpu->accel->dirty = true;
+
+ return 0;
+}
+
+static int mshv_init(AccelState *as, MachineState *ms)
+{
+ MshvState *s;
+ int mshv_fd, vm_fd, ret;
+
+ if (mshv_state) {
+ warn_report("MSHV accelerator already initialized");
+ return 0;
+ }
+
+ s = MSHV_STATE(as);
+
+ accel_blocker_init();
+
+ s->vm = 0;
+
+ ret = init_mshv(&mshv_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ mshv_init_mmio_emu();
+
+ mshv_init_msicontrol();
+
+ mshv_init_memory_slot_manager(s);
+
+ ret = create_vm(mshv_fd, &vm_fd);
+ if (ret < 0) {
+ close(mshv_fd);
+ return -1;
+ }
+
+ ret = resume_vm(vm_fd);
+ if (ret < 0) {
+ close(mshv_fd);
+ close(vm_fd);
+ return -1;
+ }
+
+ s->vm = vm_fd;
+ s->fd = mshv_fd;
+ s->nr_as = 1;
+ s->as = g_new0(MshvAddressSpace, s->nr_as);
+
+ mshv_state = s;
+
+ register_mshv_memory_listener(s, &s->memory_listener, &address_space_memory,
+ 0, "mshv-memory");
+ memory_listener_register(&mshv_io_listener, &address_space_io);
+
+ return 0;
+}
+
+static int mshv_destroy_vcpu(CPUState *cpu)
+{
+ int cpu_fd = mshv_vcpufd(cpu);
+ int vm_fd = mshv_state->vm;
+
+ mshv_remove_vcpu(vm_fd, cpu_fd);
+ mshv_vcpufd(cpu) = 0;
+
+ mshv_arch_destroy_vcpu(cpu);
+ g_clear_pointer(&cpu->accel, g_free);
+ return 0;
+}
+
+static int mshv_cpu_exec(CPUState *cpu)
+{
+ hv_message mshv_msg;
+ enum MshvVmExit exit_reason;
+ int ret = 0;
+
+ bql_unlock();
+ cpu_exec_start(cpu);
+
+ do {
+ if (cpu->accel->dirty) {
+ ret = mshv_arch_put_registers(cpu);
+ if (ret) {
+ error_report("Failed to put registers after init: %s",
+ strerror(-ret));
+ ret = -1;
+ break;
+ }
+ cpu->accel->dirty = false;
+ }
+
+ ret = mshv_run_vcpu(mshv_state->vm, cpu, &mshv_msg, &exit_reason);
+ if (ret < 0) {
+ error_report("Failed to run on vcpu %d", cpu->cpu_index);
+ abort();
+ }
+
+ switch (exit_reason) {
+ case MshvVmExitIgnore:
+ break;
+ default:
+ ret = EXCP_INTERRUPT;
+ break;
+ }
+ } while (ret == 0);
+
+ cpu_exec_end(cpu);
+ bql_lock();
+
+ if (ret < 0) {
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
+
+ return ret;
+}
+
+/*
+ * The signal handler is triggered when QEMU's main thread receives a SIG_IPI
+ * (SIGUSR1). This signal causes the current CPU thread to be kicked, forcing a
+ * VM exit on the CPU. The VM exit generates an exit reason that breaks the loop
+ * (see mshv_cpu_exec). If the exit is due to a Ctrl+A+x command, the system
+ * will shut down. For other cases, the system will continue running.
+ */
+static void sa_ipi_handler(int sig)
+{
+ /* TODO: call IOCTL to set_immediate_exit, once implemented. */
+
+ qemu_cpu_kick_self();
+}
+
+static void init_signal(CPUState *cpu)
+{
+ /* init cpu signals */
+ struct sigaction sigact;
+ sigset_t set;
+
+ memset(&sigact, 0, sizeof(sigact));
+ sigact.sa_handler = sa_ipi_handler;
+ sigaction(SIG_IPI, &sigact, NULL);
+
+ pthread_sigmask(SIG_BLOCK, NULL, &set);
+ sigdelset(&set, SIG_IPI);
+ pthread_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+static void *mshv_vcpu_thread(void *arg)
+{
+ CPUState *cpu = arg;
+ int ret;
+
+ rcu_register_thread();
+
+ bql_lock();
+ qemu_thread_get_self(cpu->thread);
+ cpu->thread_id = qemu_get_thread_id();
+ current_cpu = cpu;
+ ret = mshv_init_vcpu(cpu);
+ if (ret < 0) {
+ error_report("Failed to init vcpu %d", cpu->cpu_index);
+ goto cleanup;
+ }
+ init_signal(cpu);
+
+ /* signal CPU creation */
+ cpu_thread_signal_created(cpu);
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+ do {
+ qemu_process_cpu_events(cpu);
+ if (cpu_can_run(cpu)) {
+ mshv_cpu_exec(cpu);
+ }
+ } while (!cpu->unplug || cpu_can_run(cpu));
+
+ mshv_destroy_vcpu(cpu);
+cleanup:
+ cpu_thread_signal_destroyed(cpu);
+ bql_unlock();
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static void mshv_start_vcpu_thread(CPUState *cpu)
+{
+ char thread_name[VCPU_THREAD_NAME_SIZE];
+
+ cpu->thread = g_malloc0(sizeof(QemuThread));
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+
+ qemu_cond_init(cpu->halt_cond);
+
+ trace_mshv_start_vcpu_thread(thread_name, cpu->cpu_index);
+ qemu_thread_create(cpu->thread, thread_name, mshv_vcpu_thread, cpu,
+ QEMU_THREAD_JOINABLE);
+}
+
+static void do_mshv_cpu_synchronize_post_init(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ int ret = mshv_arch_put_registers(cpu);
+ if (ret < 0) {
+ error_report("Failed to put registers after init: %s", strerror(-ret));
+ abort();
+ }
+
+ cpu->accel->dirty = false;
+}
+
+static void mshv_cpu_synchronize_post_init(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_mshv_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+}
+
+static void mshv_cpu_synchronize_post_reset(CPUState *cpu)
+{
+ int ret = mshv_arch_put_registers(cpu);
+ if (ret) {
+ error_report("Failed to put registers after reset: %s",
+ strerror(-ret));
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
+ cpu->accel->dirty = false;
+}
+
+static void do_mshv_cpu_synchronize_pre_loadvm(CPUState *cpu,
+ run_on_cpu_data arg)
+{
+ cpu->accel->dirty = true;
+}
+
+static void mshv_cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_mshv_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
+}
+
+static void do_mshv_cpu_synchronize(CPUState *cpu, run_on_cpu_data arg)
+{
+ if (!cpu->accel->dirty) {
+ int ret = mshv_load_regs(cpu);
+ if (ret < 0) {
+ error_report("Failed to load registers for vcpu %d",
+ cpu->cpu_index);
+
+ cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+ }
+
+ cpu->accel->dirty = true;
+ }
+}
+
+static void mshv_cpu_synchronize(CPUState *cpu)
+{
+ if (!cpu->accel->dirty) {
+ run_on_cpu(cpu, do_mshv_cpu_synchronize, RUN_ON_CPU_NULL);
+ }
+}
+
+static bool mshv_cpus_are_resettable(void)
+{
+ return false;
+}
+
+static void mshv_accel_class_init(ObjectClass *oc, const void *data)
+{
+ AccelClass *ac = ACCEL_CLASS(oc);
+
+ ac->name = "MSHV";
+ ac->init_machine = mshv_init;
+ ac->allowed = &mshv_allowed;
+}
+
+static void mshv_accel_instance_init(Object *obj)
+{
+ MshvState *s = MSHV_STATE(obj);
+
+ s->vm = 0;
+}
+
+static const TypeInfo mshv_accel_type = {
+ .name = TYPE_MSHV_ACCEL,
+ .parent = TYPE_ACCEL,
+ .instance_init = mshv_accel_instance_init,
+ .class_init = mshv_accel_class_init,
+ .instance_size = sizeof(MshvState),
+};
+
+static void mshv_accel_ops_class_init(ObjectClass *oc, const void *data)
+{
+ AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
+
+ ops->create_vcpu_thread = mshv_start_vcpu_thread;
+ ops->synchronize_post_init = mshv_cpu_synchronize_post_init;
+ ops->synchronize_post_reset = mshv_cpu_synchronize_post_reset;
+ ops->synchronize_state = mshv_cpu_synchronize;
+ ops->synchronize_pre_loadvm = mshv_cpu_synchronize_pre_loadvm;
+ ops->cpus_are_resettable = mshv_cpus_are_resettable;
+ ops->handle_interrupt = generic_handle_interrupt;
+}
+
+static const TypeInfo mshv_accel_ops_type = {
+ .name = ACCEL_OPS_NAME("mshv"),
+ .parent = TYPE_ACCEL_OPS,
+ .class_init = mshv_accel_ops_class_init,
+ .abstract = true,
+};
+
+static void mshv_type_init(void)
+{
+ type_register_static(&mshv_accel_type);
+ type_register_static(&mshv_accel_ops_type);
+}
+
+type_init(mshv_type_init);
diff --git a/accel/mshv/msr.c b/accel/mshv/msr.c
new file mode 100644
index 0000000..e6e5bae
--- /dev/null
+++ b/accel/mshv/msr.c
@@ -0,0 +1,375 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/mshv.h"
+#include "system/mshv_int.h"
+#include "hw/hyperv/hvgdk_mini.h"
+#include "linux/mshv.h"
+#include "qemu/error-report.h"
+
+static uint32_t supported_msrs[64] = {
+ IA32_MSR_TSC,
+ IA32_MSR_EFER,
+ IA32_MSR_KERNEL_GS_BASE,
+ IA32_MSR_APIC_BASE,
+ IA32_MSR_PAT,
+ IA32_MSR_SYSENTER_CS,
+ IA32_MSR_SYSENTER_ESP,
+ IA32_MSR_SYSENTER_EIP,
+ IA32_MSR_STAR,
+ IA32_MSR_LSTAR,
+ IA32_MSR_CSTAR,
+ IA32_MSR_SFMASK,
+ IA32_MSR_MTRR_DEF_TYPE,
+ IA32_MSR_MTRR_PHYSBASE0,
+ IA32_MSR_MTRR_PHYSMASK0,
+ IA32_MSR_MTRR_PHYSBASE1,
+ IA32_MSR_MTRR_PHYSMASK1,
+ IA32_MSR_MTRR_PHYSBASE2,
+ IA32_MSR_MTRR_PHYSMASK2,
+ IA32_MSR_MTRR_PHYSBASE3,
+ IA32_MSR_MTRR_PHYSMASK3,
+ IA32_MSR_MTRR_PHYSBASE4,
+ IA32_MSR_MTRR_PHYSMASK4,
+ IA32_MSR_MTRR_PHYSBASE5,
+ IA32_MSR_MTRR_PHYSMASK5,
+ IA32_MSR_MTRR_PHYSBASE6,
+ IA32_MSR_MTRR_PHYSMASK6,
+ IA32_MSR_MTRR_PHYSBASE7,
+ IA32_MSR_MTRR_PHYSMASK7,
+ IA32_MSR_MTRR_FIX64K_00000,
+ IA32_MSR_MTRR_FIX16K_80000,
+ IA32_MSR_MTRR_FIX16K_A0000,
+ IA32_MSR_MTRR_FIX4K_C0000,
+ IA32_MSR_MTRR_FIX4K_C8000,
+ IA32_MSR_MTRR_FIX4K_D0000,
+ IA32_MSR_MTRR_FIX4K_D8000,
+ IA32_MSR_MTRR_FIX4K_E0000,
+ IA32_MSR_MTRR_FIX4K_E8000,
+ IA32_MSR_MTRR_FIX4K_F0000,
+ IA32_MSR_MTRR_FIX4K_F8000,
+ IA32_MSR_TSC_AUX,
+ IA32_MSR_DEBUG_CTL,
+ HV_X64_MSR_GUEST_OS_ID,
+ HV_X64_MSR_SINT0,
+ HV_X64_MSR_SINT1,
+ HV_X64_MSR_SINT2,
+ HV_X64_MSR_SINT3,
+ HV_X64_MSR_SINT4,
+ HV_X64_MSR_SINT5,
+ HV_X64_MSR_SINT6,
+ HV_X64_MSR_SINT7,
+ HV_X64_MSR_SINT8,
+ HV_X64_MSR_SINT9,
+ HV_X64_MSR_SINT10,
+ HV_X64_MSR_SINT11,
+ HV_X64_MSR_SINT12,
+ HV_X64_MSR_SINT13,
+ HV_X64_MSR_SINT14,
+ HV_X64_MSR_SINT15,
+ HV_X64_MSR_SCONTROL,
+ HV_X64_MSR_SIEFP,
+ HV_X64_MSR_SIMP,
+ HV_X64_MSR_REFERENCE_TSC,
+ HV_X64_MSR_EOM,
+};
+static const size_t msr_count = ARRAY_SIZE(supported_msrs);
+
+static int compare_msr_index(const void *a, const void *b)
+{
+ return *(uint32_t *)a - *(uint32_t *)b;
+}
+
+__attribute__((constructor))
+static void init_sorted_msr_map(void)
+{
+ qsort(supported_msrs, msr_count, sizeof(uint32_t), compare_msr_index);
+}
+
+static int mshv_is_supported_msr(uint32_t msr)
+{
+ return bsearch(&msr, supported_msrs, msr_count, sizeof(uint32_t),
+ compare_msr_index) != NULL;
+}
+
+static int mshv_msr_to_hv_reg_name(uint32_t msr, uint32_t *hv_reg)
+{
+ switch (msr) {
+ case IA32_MSR_TSC:
+ *hv_reg = HV_X64_REGISTER_TSC;
+ return 0;
+ case IA32_MSR_EFER:
+ *hv_reg = HV_X64_REGISTER_EFER;
+ return 0;
+ case IA32_MSR_KERNEL_GS_BASE:
+ *hv_reg = HV_X64_REGISTER_KERNEL_GS_BASE;
+ return 0;
+ case IA32_MSR_APIC_BASE:
+ *hv_reg = HV_X64_REGISTER_APIC_BASE;
+ return 0;
+ case IA32_MSR_PAT:
+ *hv_reg = HV_X64_REGISTER_PAT;
+ return 0;
+ case IA32_MSR_SYSENTER_CS:
+ *hv_reg = HV_X64_REGISTER_SYSENTER_CS;
+ return 0;
+ case IA32_MSR_SYSENTER_ESP:
+ *hv_reg = HV_X64_REGISTER_SYSENTER_ESP;
+ return 0;
+ case IA32_MSR_SYSENTER_EIP:
+ *hv_reg = HV_X64_REGISTER_SYSENTER_EIP;
+ return 0;
+ case IA32_MSR_STAR:
+ *hv_reg = HV_X64_REGISTER_STAR;
+ return 0;
+ case IA32_MSR_LSTAR:
+ *hv_reg = HV_X64_REGISTER_LSTAR;
+ return 0;
+ case IA32_MSR_CSTAR:
+ *hv_reg = HV_X64_REGISTER_CSTAR;
+ return 0;
+ case IA32_MSR_SFMASK:
+ *hv_reg = HV_X64_REGISTER_SFMASK;
+ return 0;
+ case IA32_MSR_MTRR_CAP:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_CAP;
+ return 0;
+ case IA32_MSR_MTRR_DEF_TYPE:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_DEF_TYPE;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE0:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK0:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE1:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK1:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE2:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK2:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE3:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK3:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE4:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK4:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE5:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK5:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE6:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK6:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6;
+ return 0;
+ case IA32_MSR_MTRR_PHYSBASE7:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7;
+ return 0;
+ case IA32_MSR_MTRR_PHYSMASK7:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7;
+ return 0;
+ case IA32_MSR_MTRR_FIX64K_00000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX64K00000;
+ return 0;
+ case IA32_MSR_MTRR_FIX16K_80000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX16K80000;
+ return 0;
+ case IA32_MSR_MTRR_FIX16K_A0000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX16KA0000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_C0000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KC0000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_C8000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KC8000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_D0000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KD0000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_D8000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KD8000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_E0000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KE0000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_E8000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KE8000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_F0000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KF0000;
+ return 0;
+ case IA32_MSR_MTRR_FIX4K_F8000:
+ *hv_reg = HV_X64_REGISTER_MSR_MTRR_FIX4KF8000;
+ return 0;
+ case IA32_MSR_TSC_AUX:
+ *hv_reg = HV_X64_REGISTER_TSC_AUX;
+ return 0;
+ case IA32_MSR_BNDCFGS:
+ *hv_reg = HV_X64_REGISTER_BNDCFGS;
+ return 0;
+ case IA32_MSR_DEBUG_CTL:
+ *hv_reg = HV_X64_REGISTER_DEBUG_CTL;
+ return 0;
+ case IA32_MSR_TSC_ADJUST:
+ *hv_reg = HV_X64_REGISTER_TSC_ADJUST;
+ return 0;
+ case IA32_MSR_SPEC_CTRL:
+ *hv_reg = HV_X64_REGISTER_SPEC_CTRL;
+ return 0;
+ case HV_X64_MSR_GUEST_OS_ID:
+ *hv_reg = HV_REGISTER_GUEST_OS_ID;
+ return 0;
+ case HV_X64_MSR_SINT0:
+ *hv_reg = HV_REGISTER_SINT0;
+ return 0;
+ case HV_X64_MSR_SINT1:
+ *hv_reg = HV_REGISTER_SINT1;
+ return 0;
+ case HV_X64_MSR_SINT2:
+ *hv_reg = HV_REGISTER_SINT2;
+ return 0;
+ case HV_X64_MSR_SINT3:
+ *hv_reg = HV_REGISTER_SINT3;
+ return 0;
+ case HV_X64_MSR_SINT4:
+ *hv_reg = HV_REGISTER_SINT4;
+ return 0;
+ case HV_X64_MSR_SINT5:
+ *hv_reg = HV_REGISTER_SINT5;
+ return 0;
+ case HV_X64_MSR_SINT6:
+ *hv_reg = HV_REGISTER_SINT6;
+ return 0;
+ case HV_X64_MSR_SINT7:
+ *hv_reg = HV_REGISTER_SINT7;
+ return 0;
+ case HV_X64_MSR_SINT8:
+ *hv_reg = HV_REGISTER_SINT8;
+ return 0;
+ case HV_X64_MSR_SINT9:
+ *hv_reg = HV_REGISTER_SINT9;
+ return 0;
+ case HV_X64_MSR_SINT10:
+ *hv_reg = HV_REGISTER_SINT10;
+ return 0;
+ case HV_X64_MSR_SINT11:
+ *hv_reg = HV_REGISTER_SINT11;
+ return 0;
+ case HV_X64_MSR_SINT12:
+ *hv_reg = HV_REGISTER_SINT12;
+ return 0;
+ case HV_X64_MSR_SINT13:
+ *hv_reg = HV_REGISTER_SINT13;
+ return 0;
+ case HV_X64_MSR_SINT14:
+ *hv_reg = HV_REGISTER_SINT14;
+ return 0;
+ case HV_X64_MSR_SINT15:
+ *hv_reg = HV_REGISTER_SINT15;
+ return 0;
+ case IA32_MSR_MISC_ENABLE:
+ *hv_reg = HV_X64_REGISTER_MSR_IA32_MISC_ENABLE;
+ return 0;
+ case HV_X64_MSR_SCONTROL:
+ *hv_reg = HV_REGISTER_SCONTROL;
+ return 0;
+ case HV_X64_MSR_SIEFP:
+ *hv_reg = HV_REGISTER_SIEFP;
+ return 0;
+ case HV_X64_MSR_SIMP:
+ *hv_reg = HV_REGISTER_SIMP;
+ return 0;
+ case HV_X64_MSR_REFERENCE_TSC:
+ *hv_reg = HV_REGISTER_REFERENCE_TSC;
+ return 0;
+ case HV_X64_MSR_EOM:
+ *hv_reg = HV_REGISTER_EOM;
+ return 0;
+ default:
+ error_report("failed to map MSR %u to HV register name", msr);
+ return -1;
+ }
+}
+
+static int set_msrs(const CPUState *cpu, GList *msrs)
+{
+ size_t n_msrs;
+ GList *entries;
+ MshvMsrEntry *entry;
+ enum hv_register_name name;
+ struct hv_register_assoc *assoc;
+ int ret;
+ size_t i = 0;
+
+ n_msrs = g_list_length(msrs);
+ hv_register_assoc *assocs = g_new0(hv_register_assoc, n_msrs);
+
+ entries = msrs;
+ for (const GList *elem = entries; elem != NULL; elem = elem->next) {
+ entry = elem->data;
+ ret = mshv_msr_to_hv_reg_name(entry->index, &name);
+ if (ret < 0) {
+ g_free(assocs);
+ return ret;
+ }
+ assoc = &assocs[i];
+ assoc->name = name;
+ /* the union has been initialized to 0 */
+ assoc->value.reg64 = entry->data;
+ i++;
+ }
+ ret = mshv_set_generic_regs(cpu, assocs, n_msrs);
+ g_free(assocs);
+ if (ret < 0) {
+ error_report("failed to set msrs");
+ return -1;
+ }
+ return 0;
+}
+
+
+int mshv_configure_msr(const CPUState *cpu, const MshvMsrEntry *msrs,
+ size_t n_msrs)
+{
+ GList *valid_msrs = NULL;
+ uint32_t msr_index;
+ int ret;
+
+ for (size_t i = 0; i < n_msrs; i++) {
+ msr_index = msrs[i].index;
+ /* check whether index of msrs is in SUPPORTED_MSRS */
+ if (mshv_is_supported_msr(msr_index)) {
+ valid_msrs = g_list_append(valid_msrs, (void *) &msrs[i]);
+ }
+ }
+
+ ret = set_msrs(cpu, valid_msrs);
+ g_list_free(valid_msrs);
+
+ return ret;
+}
diff --git a/accel/mshv/trace-events b/accel/mshv/trace-events
new file mode 100644
index 0000000..36f0d59
--- /dev/null
+++ b/accel/mshv/trace-events
@@ -0,0 +1,33 @@
+# Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
+# Magnus Kulke <magnuskulke@microsoft.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+mshv_start_vcpu_thread(const char* thread, uint32_t cpu) "thread=%s cpu_index=%d"
+
+mshv_set_memory(bool add, uint64_t gpa, uint64_t size, uint64_t user_addr, bool readonly, int ret) "add=%d gpa=0x%" PRIx64 " size=0x%" PRIx64 " user=0x%" PRIx64 " readonly=%d result=%d"
+mshv_mem_ioeventfd_add(uint64_t addr, uint32_t size, uint32_t data) "addr=0x%" PRIx64 " size=%d data=0x%x"
+mshv_mem_ioeventfd_del(uint64_t addr, uint32_t size, uint32_t data) "addr=0x%" PRIx64 " size=%d data=0x%x"
+
+mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
+
+mshv_handle_interrupt(uint32_t cpu, int mask) "cpu_index=%d mask=0x%x"
+mshv_set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data) "gsi=%d addr=0x%" PRIx64 " data=0x%x"
+mshv_remove_msi_routing(uint32_t gsi) "gsi=%d"
+mshv_add_msi_routing(uint64_t addr, uint32_t data) "addr=0x%" PRIx64 " data=0x%x"
+mshv_commit_msi_routing_table(int vm_fd, int len) "vm_fd=%d table_size=%d"
+mshv_register_irqfd(int vm_fd, int event_fd, uint32_t gsi) "vm_fd=%d event_fd=%d gsi=%d"
+mshv_irqchip_update_irqfd_notifier_gsi(int event_fd, int resample_fd, int virq, bool add) "event_fd=%d resample_fd=%d virq=%d add=%d"
+
+mshv_insn_fetch(uint64_t addr, size_t size) "gpa=0x%" PRIx64 " size=%zu"
+mshv_mem_write(uint64_t addr, size_t size) "\tgpa=0x%" PRIx64 " size=%zu"
+mshv_mem_read(uint64_t addr, size_t size) "\tgpa=0x%" PRIx64 " size=%zu"
+mshv_map_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
+mshv_unmap_memory(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
+mshv_set_phys_mem(bool add, const char *name, uint64_t gpa) "\tadd=%d name=%s gpa=0x%010" PRIx64
+mshv_handle_mmio(uint64_t gva, uint64_t gpa, uint64_t size, uint8_t access_type) "\tgva=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%" PRIx64 " access_type=%d"
+
+mshv_found_slot(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
+mshv_skip_unset_mem(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
+mshv_remap_attempt(uint64_t userspace_addr, uint64_t gpa, uint64_t size) "\tu_a=0x%" PRIx64 " gpa=0x%010" PRIx64 " size=0x%08" PRIx64
+mshv_find_slot_by_gpa(uint64_t gpa) "\tgpa=0x%010" PRIx64
diff --git a/accel/mshv/trace.h b/accel/mshv/trace.h
new file mode 100644
index 0000000..0dca48f
--- /dev/null
+++ b/accel/mshv/trace.h
@@ -0,0 +1,14 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors:
+ * Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ */
+
+#include "trace/trace-accel_mshv.h"
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index 7fae80f..1d4337d 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -18,12 +18,14 @@
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qemu/accel.h"
-#include "system/accel-ops.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
#include "system/qtest.h"
#include "system/cpus.h"
#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
+#include "accel/dummy-cpus.h"
static int64_t qtest_clock_counter;
@@ -37,12 +39,12 @@ static void qtest_set_virtual_clock(int64_t count)
qatomic_set_i64(&qtest_clock_counter, count);
}
-static int qtest_init_accel(MachineState *ms)
+static int qtest_init_accel(AccelState *as, MachineState *ms)
{
return 0;
}
-static void qtest_accel_class_init(ObjectClass *oc, void *data)
+static void qtest_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "QTest";
@@ -59,13 +61,14 @@ static const TypeInfo qtest_accel_type = {
};
module_obj(TYPE_QTEST_ACCEL);
-static void qtest_accel_ops_class_init(ObjectClass *oc, void *data)
+static void qtest_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = dummy_start_vcpu_thread;
ops->get_virtual_clock = qtest_get_virtual_clock;
ops->set_virtual_clock = qtest_set_virtual_clock;
+ ops->handle_interrupt = generic_handle_interrupt;
};
static const TypeInfo qtest_accel_ops_type = {
diff --git a/accel/stubs/hvf-stub.c b/accel/stubs/hvf-stub.c
new file mode 100644
index 0000000..42eadc5
--- /dev/null
+++ b/accel/stubs/hvf-stub.c
@@ -0,0 +1,12 @@
+/*
+ * HVF stubs for QEMU
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/hvf.h"
+
+bool hvf_allowed;
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index ecfd763..68cd33b 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -29,10 +29,6 @@ void kvm_flush_coalesced_mmio_buffer(void)
{
}
-void kvm_cpu_synchronize_state(CPUState *cpu)
-{
-}
-
bool kvm_has_sync_mmu(void)
{
return false;
@@ -105,11 +101,6 @@ unsigned int kvm_get_free_memslots(void)
return 0;
}
-void kvm_init_cpu_signals(CPUState *cpu)
-{
- abort();
-}
-
bool kvm_arm_supports_user_irq(void)
{
return false;
diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build
index 91a2d21..48eccd1 100644
--- a/accel/stubs/meson.build
+++ b/accel/stubs/meson.build
@@ -2,5 +2,9 @@ system_stubs_ss = ss.source_set()
system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_MSHV', if_false: files('mshv-stub.c'))
specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss)
diff --git a/accel/stubs/mshv-stub.c b/accel/stubs/mshv-stub.c
new file mode 100644
index 0000000..e499b19
--- /dev/null
+++ b/accel/stubs/mshv-stub.c
@@ -0,0 +1,44 @@
+/*
+ * QEMU MSHV stub
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/msi.h"
+#include "system/mshv.h"
+
+bool mshv_allowed;
+
+int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev)
+{
+ return -ENOSYS;
+}
+
+void mshv_irqchip_release_virq(int virq)
+{
+}
+
+int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev)
+{
+ return -ENOSYS;
+}
+
+void mshv_irqchip_commit_routes(void)
+{
+}
+
+int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *n,
+ const EventNotifier *rn, int virq)
+{
+ return -ENOSYS;
+}
+
+int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *n, int virq)
+{
+ return -ENOSYS;
+}
diff --git a/accel/stubs/nvmm-stub.c b/accel/stubs/nvmm-stub.c
new file mode 100644
index 0000000..ec14837
--- /dev/null
+++ b/accel/stubs/nvmm-stub.c
@@ -0,0 +1,12 @@
+/*
+ * NVMM stubs for QEMU
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/nvmm.h"
+
+bool nvmm_allowed;
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index b2b9881..77055e3 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -11,15 +11,9 @@
*/
#include "qemu/osdep.h"
-#include "exec/tb-flush.h"
-#include "exec/exec-all.h"
+#include "exec/cpu-common.h"
G_NORETURN void cpu_loop_exit(CPUState *cpu)
{
g_assert_not_reached();
}
-
-G_NORETURN void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
-{
- g_assert_not_reached();
-}
diff --git a/accel/stubs/whpx-stub.c b/accel/stubs/whpx-stub.c
new file mode 100644
index 0000000..c564c89
--- /dev/null
+++ b/accel/stubs/whpx-stub.c
@@ -0,0 +1,12 @@
+/*
+ * WHPX stubs for QEMU
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/whpx.h"
+
+bool whpx_allowed;
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
index 6056598..bca93a0 100644
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -122,5 +122,14 @@ GEN_ATOMIC_HELPERS(umax_fetch)
GEN_ATOMIC_HELPERS(xchg)
+#if HAVE_CMPXCHG128
+ATOMIC_HELPER(xchgo_be, Int128)
+ATOMIC_HELPER(xchgo_le, Int128)
+ATOMIC_HELPER(fetch_ando_be, Int128)
+ATOMIC_HELPER(fetch_ando_le, Int128)
+ATOMIC_HELPER(fetch_oro_be, Int128)
+ATOMIC_HELPER(fetch_oro_le, Int128)
+#endif
+
#undef ATOMIC_HELPER
#undef GEN_ATOMIC_HELPERS
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 89593b2..ae5203b 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -77,7 +77,7 @@
# define END _le
#endif
-ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr,
ABI_TYPE cmpv, ABI_TYPE newv,
MemOpIdx oi, uintptr_t retaddr)
{
@@ -100,15 +100,35 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
return ret;
}
-#if DATA_SIZE < 16
-ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
DATA_SIZE, retaddr);
DATA_TYPE ret;
+#if DATA_SIZE == 16
+ ret = atomic16_xchg(haddr, val);
+#else
ret = qatomic_xchg__nocheck(haddr, val);
+#endif
+ ATOMIC_MMU_CLEANUP;
+ atomic_trace_rmw_post(env, addr,
+ VALUE_LOW(ret),
+ VALUE_HIGH(ret),
+ VALUE_LOW(val),
+ VALUE_HIGH(val),
+ oi);
+ return ret;
+}
+
+#if DATA_SIZE == 16
+ABI_TYPE ATOMIC_NAME(fetch_and)(CPUArchState *env, vaddr addr, ABI_TYPE val,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
+ DATA_SIZE, retaddr);
+ DATA_TYPE ret = atomic16_fetch_and(haddr, val);
ATOMIC_MMU_CLEANUP;
atomic_trace_rmw_post(env, addr,
VALUE_LOW(ret),
@@ -119,8 +139,24 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
return ret;
}
+ABI_TYPE ATOMIC_NAME(fetch_or)(CPUArchState *env, vaddr addr, ABI_TYPE val,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
+ DATA_SIZE, retaddr);
+ DATA_TYPE ret = atomic16_fetch_or(haddr, val);
+ ATOMIC_MMU_CLEANUP;
+ atomic_trace_rmw_post(env, addr,
+ VALUE_LOW(ret),
+ VALUE_HIGH(ret),
+ VALUE_LOW(val),
+ VALUE_HIGH(val),
+ oi);
+ return ret;
+}
+#else
#define GEN_ATOMIC_HELPER(X) \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
{ \
DATA_TYPE *haddr, ret; \
@@ -156,7 +192,7 @@ GEN_ATOMIC_HELPER(xor_fetch)
* of CF_PARALLEL's value, we'll trace just a read and a write.
*/
#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
{ \
XDATA_TYPE *haddr, cmp, old, new, val = xval; \
@@ -188,7 +224,7 @@ GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
#undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA SIZE < 16 */
+#endif /* DATA SIZE == 16 */
#undef END
@@ -202,7 +238,7 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new)
# define END _be
#endif
-ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, vaddr addr,
ABI_TYPE cmpv, ABI_TYPE newv,
MemOpIdx oi, uintptr_t retaddr)
{
@@ -225,15 +261,35 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
return BSWAP(ret);
}
-#if DATA_SIZE < 16
-ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, vaddr addr, ABI_TYPE val,
MemOpIdx oi, uintptr_t retaddr)
{
DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
DATA_SIZE, retaddr);
ABI_TYPE ret;
+#if DATA_SIZE == 16
+ ret = atomic16_xchg(haddr, BSWAP(val));
+#else
ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
+#endif
+ ATOMIC_MMU_CLEANUP;
+ atomic_trace_rmw_post(env, addr,
+ VALUE_LOW(ret),
+ VALUE_HIGH(ret),
+ VALUE_LOW(val),
+ VALUE_HIGH(val),
+ oi);
+ return BSWAP(ret);
+}
+
+#if DATA_SIZE == 16
+ABI_TYPE ATOMIC_NAME(fetch_and)(CPUArchState *env, vaddr addr, ABI_TYPE val,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
+ DATA_SIZE, retaddr);
+ DATA_TYPE ret = atomic16_fetch_and(haddr, BSWAP(val));
ATOMIC_MMU_CLEANUP;
atomic_trace_rmw_post(env, addr,
VALUE_LOW(ret),
@@ -244,8 +300,24 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
return BSWAP(ret);
}
+ABI_TYPE ATOMIC_NAME(fetch_or)(CPUArchState *env, vaddr addr, ABI_TYPE val,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ DATA_TYPE *haddr = atomic_mmu_lookup(env_cpu(env), addr, oi,
+ DATA_SIZE, retaddr);
+ DATA_TYPE ret = atomic16_fetch_or(haddr, BSWAP(val));
+ ATOMIC_MMU_CLEANUP;
+ atomic_trace_rmw_post(env, addr,
+ VALUE_LOW(ret),
+ VALUE_HIGH(ret),
+ VALUE_LOW(val),
+ VALUE_HIGH(val),
+ oi);
+ return BSWAP(ret);
+}
+#else
#define GEN_ATOMIC_HELPER(X) \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \
ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
{ \
DATA_TYPE *haddr, ret; \
@@ -278,7 +350,7 @@ GEN_ATOMIC_HELPER(xor_fetch)
* of CF_PARALLEL's value, we'll trace just a read and a write.
*/
#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \
-ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr, \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, vaddr addr, \
ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
{ \
XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval; \
@@ -317,7 +389,7 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
#undef ADD
#undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA_SIZE < 16 */
+#endif /* DATA_SIZE == 16 */
#undef END
#endif /* DATA_SIZE > 1 */
diff --git a/accel/tcg/backend-ldst.h b/accel/tcg/backend-ldst.h
new file mode 100644
index 0000000..9c3a407
--- /dev/null
+++ b/accel/tcg/backend-ldst.h
@@ -0,0 +1,41 @@
+/*
+ * Internal memory barrier helpers for QEMU (target agnostic)
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef ACCEL_TCG_BACKEND_LDST_H
+#define ACCEL_TCG_BACKEND_LDST_H
+
+#include "tcg-target-mo.h"
+
+/**
+ * tcg_req_mo:
+ * @guest_mo: Guest default memory order
+ * @type: TCGBar
+ *
+ * Filter @type to the barrier that is required for the guest
+ * memory ordering vs the host memory ordering. A non-zero
+ * result indicates that some barrier is required.
+ */
+#define tcg_req_mo(guest_mo, type) \
+ ((type) & guest_mo & ~TCG_TARGET_DEFAULT_MO)
+
+/**
+ * cpu_req_mo:
+ * @cpu: CPUState
+ * @type: TCGBar
+ *
+ * If tcg_req_mo indicates a barrier for @type is required
+ * for the guest memory model, issue a host memory barrier.
+ */
+#define cpu_req_mo(cpu, type) \
+ do { \
+ if (tcg_req_mo(cpu->cc->tcg_ops->guest_default_memory_order, type)) { \
+ smp_mb(); \
+ } \
+ } while (0)
+
+#endif
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index ef3d967..7c20d9d 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -23,27 +23,29 @@
#include "qapi/type-helpers.h"
#include "hw/core/cpu.h"
#include "accel/tcg/cpu-ops.h"
+#include "accel/tcg/helper-retaddr.h"
#include "trace.h"
#include "disas/disas.h"
#include "exec/cpu-common.h"
+#include "exec/cpu-interrupt.h"
#include "exec/page-protection.h"
+#include "exec/mmap-lock.h"
#include "exec/translation-block.h"
#include "tcg/tcg.h"
#include "qemu/atomic.h"
#include "qemu/rcu.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
-#include "exec/cpu-all.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "exec/replay-core.h"
#include "system/tcg.h"
#include "exec/helper-proto-common.h"
+#include "tcg-accel-ops.h"
#include "tb-jmp-cache.h"
#include "tb-hash.h"
#include "tb-context.h"
#include "tb-internal.h"
#include "internal-common.h"
-#include "internal-target.h"
/* -icount align implementation. */
@@ -148,12 +150,9 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
#endif /* CONFIG USER ONLY */
struct tb_desc {
- vaddr pc;
- uint64_t cs_base;
+ TCGTBCPUState s;
CPUArchState *env;
tb_page_addr_t page_addr0;
- uint32_t flags;
- uint32_t cflags;
};
static bool tb_lookup_cmp(const void *p, const void *d)
@@ -161,11 +160,11 @@ static bool tb_lookup_cmp(const void *p, const void *d)
const TranslationBlock *tb = p;
const struct tb_desc *desc = d;
- if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) &&
+ if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->s.pc) &&
tb_page_addr0(tb) == desc->page_addr0 &&
- tb->cs_base == desc->cs_base &&
- tb->flags == desc->flags &&
- tb_cflags(tb) == desc->cflags) {
+ tb->cs_base == desc->s.cs_base &&
+ tb->flags == desc->s.flags &&
+ tb_cflags(tb) == desc->s.cflags) {
/* check next page if needed */
tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
if (tb_phys_page1 == -1) {
@@ -183,7 +182,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
* is different for the new TB. Therefore any exception raised
* here by the faulting lookup is not premature.
*/
- virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
+ virt_page1 = TARGET_PAGE_ALIGN(desc->s.pc);
phys_page1 = get_page_addr_code(desc->env, virt_page1);
if (tb_phys_page1 == phys_page1) {
return true;
@@ -193,26 +192,21 @@ static bool tb_lookup_cmp(const void *p, const void *d)
return false;
}
-static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
- uint64_t cs_base, uint32_t flags,
- uint32_t cflags)
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, TCGTBCPUState s)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
uint32_t h;
+ desc.s = s;
desc.env = cpu_env(cpu);
- desc.cs_base = cs_base;
- desc.flags = flags;
- desc.cflags = cflags;
- desc.pc = pc;
- phys_pc = get_page_addr_code(desc.env, pc);
+ phys_pc = get_page_addr_code(desc.env, s.pc);
if (phys_pc == -1) {
return NULL;
}
desc.page_addr0 = phys_pc;
- h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
- flags, cs_base, cflags);
+ h = tb_hash_func(phys_pc, (s.cflags & CF_PCREL ? 0 : s.pc),
+ s.flags, s.cs_base, s.cflags);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@@ -230,35 +224,33 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
*
* Returns: an existing translation block or NULL.
*/
-static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
- uint64_t cs_base, uint32_t flags,
- uint32_t cflags)
+static inline TranslationBlock *tb_lookup(CPUState *cpu, TCGTBCPUState s)
{
TranslationBlock *tb;
CPUJumpCache *jc;
uint32_t hash;
/* we should never be trying to look up an INVALID tb */
- tcg_debug_assert(!(cflags & CF_INVALID));
+ tcg_debug_assert(!(s.cflags & CF_INVALID));
- hash = tb_jmp_cache_hash_func(pc);
+ hash = tb_jmp_cache_hash_func(s.pc);
jc = cpu->tb_jmp_cache;
tb = qatomic_read(&jc->array[hash].tb);
if (likely(tb &&
- jc->array[hash].pc == pc &&
- tb->cs_base == cs_base &&
- tb->flags == flags &&
- tb_cflags(tb) == cflags)) {
+ jc->array[hash].pc == s.pc &&
+ tb->cs_base == s.cs_base &&
+ tb->flags == s.flags &&
+ tb_cflags(tb) == s.cflags)) {
goto hit;
}
- tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
+ tb = tb_htable_lookup(cpu, s);
if (tb == NULL) {
return NULL;
}
- jc->array[hash].pc = pc;
+ jc->array[hash].pc = s.pc;
qatomic_set(&jc->array[hash].tb, tb);
hit:
@@ -266,7 +258,7 @@ hit:
* As long as tb is not NULL, the contents are consistent. Therefore,
* the virtual PC has to match for non-CF_PCREL translations.
*/
- assert((tb_cflags(tb) & CF_PCREL) || tb->pc == pc);
+ assert((tb_cflags(tb) & CF_PCREL) || tb->pc == s.pc);
return tb;
}
@@ -283,14 +275,11 @@ static void log_cpu_exec(vaddr pc, CPUState *cpu,
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
FILE *logfile = qemu_log_trylock();
if (logfile) {
- int flags = 0;
+ int flags = CPU_DUMP_CCOP;
if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
flags |= CPU_DUMP_FPU;
}
-#if defined(TARGET_I386)
- flags |= CPU_DUMP_CCOP;
-#endif
if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) {
flags |= CPU_DUMP_VPU;
}
@@ -386,9 +375,6 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
{
CPUState *cpu = env_cpu(env);
TranslationBlock *tb;
- vaddr pc;
- uint64_t cs_base;
- uint32_t flags, cflags;
/*
* By definition we've just finished a TB, so I/O is OK.
@@ -398,20 +384,21 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
* The next TB, if we chain to it, will clear the flag again.
*/
cpu->neg.can_do_io = true;
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- cflags = curr_cflags(cpu);
- if (check_for_breakpoints(cpu, pc, &cflags)) {
+ TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
+ s.cflags = curr_cflags(cpu);
+
+ if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
cpu_loop_exit(cpu);
}
- tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
+ tb = tb_lookup(cpu, s);
if (tb == NULL) {
return tcg_code_gen_epilogue;
}
if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
- log_cpu_exec(pc, cpu, tb);
+ log_cpu_exec(s.pc, cpu, tb);
}
return tb->tc.ptr;
@@ -561,11 +548,7 @@ static void cpu_exec_longjmp_cleanup(CPUState *cpu)
void cpu_exec_step_atomic(CPUState *cpu)
{
- CPUArchState *env = cpu_env(cpu);
TranslationBlock *tb;
- vaddr pc;
- uint64_t cs_base;
- uint32_t flags, cflags;
int tb_exit;
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@@ -574,13 +557,13 @@ void cpu_exec_step_atomic(CPUState *cpu)
g_assert(!cpu->running);
cpu->running = true;
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
+ s.cflags = curr_cflags(cpu);
- cflags = curr_cflags(cpu);
/* Execute in a serial context. */
- cflags &= ~CF_PARALLEL;
+ s.cflags &= ~CF_PARALLEL;
/* After 1 insn, return and release the exclusive lock. */
- cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
+ s.cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
/*
* No need to check_for_breakpoints here.
* We only arrive in cpu_exec_step_atomic after beginning execution
@@ -588,16 +571,16 @@ void cpu_exec_step_atomic(CPUState *cpu)
* Any breakpoint for this insn will have been recognized earlier.
*/
- tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
+ tb = tb_lookup(cpu, s);
if (tb == NULL) {
mmap_lock();
- tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
+ tb = tb_gen_code(cpu, s);
mmap_unlock();
}
cpu_exec_enter(cpu);
/* execute the generated code */
- trace_exec_tb(tb, pc);
+ trace_exec_tb(tb, s.pc);
cpu_tb_exec(cpu, tb, &tb_exit);
cpu_exec_exit(cpu);
} else {
@@ -665,7 +648,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
out_unlock_next:
qemu_spin_unlock(&tb_next->jmp_lock);
- return;
}
static inline bool cpu_handle_halt(CPUState *cpu)
@@ -731,10 +713,10 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
* If user mode only, we simulate a fake exception which will be
* handled outside the cpu execution loop.
*/
-#if defined(TARGET_I386)
const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
- tcg_ops->fake_user_interrupt(cpu);
-#endif /* TARGET_I386 */
+ if (tcg_ops->fake_user_interrupt) {
+ tcg_ops->fake_user_interrupt(cpu);
+ }
*ret = cpu->exception_index;
cpu->exception_index = -1;
return true;
@@ -767,6 +749,22 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
return false;
}
+void tcg_kick_vcpu_thread(CPUState *cpu)
+{
+#ifndef CONFIG_USER_ONLY
+ /*
+ * Ensure cpu_exec will see the reason why the exit request was set.
+ * FIXME: this is not always needed. Other accelerators instead
+ * read interrupt_request and set exit_request on demand from the
+ * CPU thread; see kvm_arch_pre_run() for example.
+ */
+ qatomic_store_release(&cpu->exit_request, true);
+#endif
+
+ /* Ensure cpu_exec will see the exit request after TCG has exited. */
+ qatomic_store_release(&cpu->neg.icount_decr.u16.high, -1);
+}
+
static inline bool icount_exit_request(CPUState *cpu)
{
if (!icount_enabled()) {
@@ -793,61 +791,53 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
/* Clear the interrupt flag now since we're processing
* cpu->interrupt_request and cpu->exit_request.
* Ensure zeroing happens before reading cpu->exit_request or
- * cpu->interrupt_request (see also smp_wmb in cpu_exit())
+ * cpu->interrupt_request (see also store-release in
+ * tcg_kick_vcpu_thread())
*/
qatomic_set_mb(&cpu->neg.icount_decr.u16.high, 0);
- if (unlikely(qatomic_read(&cpu->interrupt_request))) {
- int interrupt_request;
+#ifdef CONFIG_USER_ONLY
+ assert(!cpu_test_interrupt(cpu, ~0));
+#else
+ if (unlikely(cpu_test_interrupt(cpu, ~0))) {
bql_lock();
- interrupt_request = cpu->interrupt_request;
- if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
- /* Mask out external interrupts for this step. */
- interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
- }
- if (interrupt_request & CPU_INTERRUPT_DEBUG) {
- cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
+ if (cpu_test_interrupt(cpu, CPU_INTERRUPT_DEBUG)) {
+ cpu_reset_interrupt(cpu, CPU_INTERRUPT_DEBUG);
cpu->exception_index = EXCP_DEBUG;
bql_unlock();
return true;
}
-#if !defined(CONFIG_USER_ONLY)
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
/* Do nothing */
- } else if (interrupt_request & CPU_INTERRUPT_HALT) {
+ } else if (cpu_test_interrupt(cpu, CPU_INTERRUPT_HALT)) {
replay_interrupt();
- cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
+ cpu_reset_interrupt(cpu, CPU_INTERRUPT_HALT);
cpu->halted = 1;
cpu->exception_index = EXCP_HLT;
bql_unlock();
return true;
- }
-#if defined(TARGET_I386)
- else if (interrupt_request & CPU_INTERRUPT_INIT) {
- X86CPU *x86_cpu = X86_CPU(cpu);
- CPUArchState *env = &x86_cpu->env;
- replay_interrupt();
- cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
- do_cpu_init(x86_cpu);
- cpu->exception_index = EXCP_HALTED;
- bql_unlock();
- return true;
- }
-#else
- else if (interrupt_request & CPU_INTERRUPT_RESET) {
- replay_interrupt();
- cpu_reset(cpu);
- bql_unlock();
- return true;
- }
-#endif /* !TARGET_I386 */
- /* The target hook has 3 exit conditions:
- False when the interrupt isn't processed,
- True when it is, and we should restart on a new TB,
- and via longjmp via cpu_loop_exit. */
- else {
+ } else {
const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+ int interrupt_request = cpu->interrupt_request;
+
+ if (cpu_test_interrupt(cpu, CPU_INTERRUPT_RESET)) {
+ replay_interrupt();
+ tcg_ops->cpu_exec_reset(cpu);
+ bql_unlock();
+ return true;
+ }
+ if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
+ /* Mask out external interrupts for this step. */
+ interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
+ }
+
+ /*
+ * The target hook has 3 exit conditions:
+ * False when the interrupt isn't processed,
+ * True when it is, and we should restart on a new TB,
+ * and via longjmp via cpu_loop_exit.
+ */
if (tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
if (!tcg_ops->need_replay_interrupt ||
tcg_ops->need_replay_interrupt(interrupt_request)) {
@@ -866,13 +856,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
cpu->exception_index = -1;
*last_tb = NULL;
}
- /* The target hook may have updated the 'cpu->interrupt_request';
- * reload the 'interrupt_request' value */
- interrupt_request = cpu->interrupt_request;
}
-#endif /* !CONFIG_USER_ONLY */
- if (interrupt_request & CPU_INTERRUPT_EXITTB) {
- cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
+ if (cpu_test_interrupt(cpu, CPU_INTERRUPT_EXITTB)) {
+ cpu_reset_interrupt(cpu, CPU_INTERRUPT_EXITTB);
/* ensure that no TB jump will be modified as
the program flow was changed */
*last_tb = NULL;
@@ -881,10 +867,13 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
bql_unlock();
}
+#endif /* !CONFIG_USER_ONLY */
- /* Finally, check if we need to exit to the main loop. */
- if (unlikely(qatomic_read(&cpu->exit_request)) || icount_exit_request(cpu)) {
- qatomic_set(&cpu->exit_request, 0);
+ /*
+ * Finally, check if we need to exit to the main loop.
+ * The corresponding store-release is in cpu_exit.
+ */
+ if (unlikely(qatomic_load_acquire(&cpu->exit_request)) || icount_exit_request(cpu)) {
if (cpu->exception_index == -1) {
cpu->exception_index = EXCP_INTERRUPT;
}
@@ -954,11 +943,8 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
while (!cpu_handle_interrupt(cpu, &last_tb)) {
TranslationBlock *tb;
- vaddr pc;
- uint64_t cs_base;
- uint32_t flags, cflags;
-
- cpu_get_tb_cpu_state(cpu_env(cpu), &pc, &cs_base, &flags);
+ TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
+ s.cflags = cpu->cflags_next_tb;
/*
* When requested, use an exact setting for cflags for the next
@@ -967,33 +953,32 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
* have CF_INVALID set, -1 is a convenient invalid value that
* does not require tcg headers for cpu_common_reset.
*/
- cflags = cpu->cflags_next_tb;
- if (cflags == -1) {
- cflags = curr_cflags(cpu);
+ if (s.cflags == -1) {
+ s.cflags = curr_cflags(cpu);
} else {
cpu->cflags_next_tb = -1;
}
- if (check_for_breakpoints(cpu, pc, &cflags)) {
+ if (check_for_breakpoints(cpu, s.pc, &s.cflags)) {
break;
}
- tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
+ tb = tb_lookup(cpu, s);
if (tb == NULL) {
CPUJumpCache *jc;
uint32_t h;
mmap_lock();
- tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
+ tb = tb_gen_code(cpu, s);
mmap_unlock();
/*
* We add the TB in the virtual pc hash table
* for the fast lookup
*/
- h = tb_jmp_cache_hash_func(pc);
+ h = tb_jmp_cache_hash_func(s.pc);
jc = cpu->tb_jmp_cache;
- jc->array[h].pc = pc;
+ jc->array[h].pc = s.pc;
qatomic_set(&jc->array[h].tb, tb);
}
@@ -1013,7 +998,7 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
tb_add_jump(last_tb, tb_exit, tb);
}
- cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
+ cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit);
/* Try to align the host and virtual clocks
if the guest is in advance */
@@ -1072,8 +1057,12 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
#ifndef CONFIG_USER_ONLY
assert(tcg_ops->cpu_exec_halt);
assert(tcg_ops->cpu_exec_interrupt);
+ assert(tcg_ops->cpu_exec_reset);
+ assert(tcg_ops->pointer_wrap);
#endif /* !CONFIG_USER_ONLY */
assert(tcg_ops->translate_code);
+ assert(tcg_ops->get_tb_cpu_state);
+ assert(tcg_ops->mmu_index);
tcg_ops->initialize();
tcg_target_initialized = true;
}
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index fb22048..631f1fe 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -19,15 +19,18 @@
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
+#include "qemu/target-info.h"
#include "accel/tcg/cpu-ops.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/iommu.h"
+#include "accel/tcg/probe.h"
#include "exec/page-protection.h"
-#include "exec/memory.h"
-#include "exec/cpu_ldst.h"
+#include "system/memory.h"
+#include "system/physmem.h"
+#include "accel/tcg/cpu-ldst-common.h"
+#include "accel/tcg/cpu-mmu-index.h"
#include "exec/cputlb.h"
#include "exec/tb-flush.h"
-#include "exec/memory-internal.h"
-#include "exec/ram_addr.h"
+#include "system/ram_addr.h"
#include "exec/mmu-access-type.h"
#include "exec/tlb-common.h"
#include "exec/vaddr.h"
@@ -35,18 +38,21 @@
#include "qemu/error-report.h"
#include "exec/log.h"
#include "exec/helper-proto-common.h"
+#include "exec/tlb-flags.h"
#include "qemu/atomic.h"
#include "qemu/atomic128.h"
#include "tb-internal.h"
#include "trace.h"
#include "tb-hash.h"
#include "tb-internal.h"
+#include "tlb-bounds.h"
#include "internal-common.h"
-#include "internal-target.h"
#ifdef CONFIG_PLUGIN
#include "qemu/plugin-memory.h"
#endif
#include "tcg/tcg-ldst.h"
+#include "backend-ldst.h"
+
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
/* #define DEBUG_TLB */
@@ -84,9 +90,6 @@
*/
QEMU_BUILD_BUG_ON(sizeof(vaddr) > sizeof(run_on_cpu_data));
-/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
- */
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
@@ -124,7 +127,7 @@ static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
vaddr addr)
{
- uintptr_t size_mask = cpu->neg.tlb.f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
+ uintptr_t size_mask = cpu_tlb_fast(cpu, mmu_idx)->mask >> CPU_TLB_ENTRY_BITS;
return (addr >> TARGET_PAGE_BITS) & size_mask;
}
@@ -133,7 +136,7 @@ static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
static inline CPUTLBEntry *tlb_entry(CPUState *cpu, uintptr_t mmu_idx,
vaddr addr)
{
- return &cpu->neg.tlb.f[mmu_idx].table[tlb_index(cpu, mmu_idx, addr)];
+ return &cpu_tlb_fast(cpu, mmu_idx)->table[tlb_index(cpu, mmu_idx, addr)];
}
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
@@ -287,7 +290,7 @@ static void tlb_flush_one_mmuidx_locked(CPUState *cpu, int mmu_idx,
int64_t now)
{
CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
- CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
+ CPUTLBDescFast *fast = cpu_tlb_fast(cpu, mmu_idx);
tlb_mmu_resize_locked(desc, fast, now);
tlb_mmu_flush_locked(desc, fast);
@@ -326,7 +329,7 @@ void tlb_init(CPUState *cpu)
cpu->neg.tlb.c.dirty = 0;
for (i = 0; i < NB_MMU_MODES; i++) {
- tlb_mmu_init(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i], now);
+ tlb_mmu_init(&cpu->neg.tlb.d[i], cpu_tlb_fast(cpu, i), now);
}
}
@@ -337,7 +340,7 @@ void tlb_destroy(CPUState *cpu)
qemu_spin_destroy(&cpu->neg.tlb.c.lock);
for (i = 0; i < NB_MMU_MODES; i++) {
CPUTLBDesc *desc = &cpu->neg.tlb.d[i];
- CPUTLBDescFast *fast = &cpu->neg.tlb.f[i];
+ CPUTLBDescFast *fast = cpu_tlb_fast(cpu, i);
g_free(fast->table);
g_free(desc->fulltlb);
@@ -365,8 +368,8 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
- uint16_t asked = data.host_int;
- uint16_t all_dirty, work, to_clean;
+ MMUIdxMap asked = data.host_int;
+ MMUIdxMap all_dirty, work, to_clean;
int64_t now = get_clock_realtime();
assert_cpu_is_self(cpu);
@@ -403,7 +406,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
}
}
-void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
+void tlb_flush_by_mmuidx(CPUState *cpu, MMUIdxMap idxmap)
{
tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
@@ -417,7 +420,7 @@ void tlb_flush(CPUState *cpu)
tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
}
-void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, MMUIdxMap idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
@@ -526,7 +529,7 @@ static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
*/
static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
vaddr addr,
- uint16_t idxmap)
+ MMUIdxMap idxmap)
{
int mmu_idx;
@@ -565,14 +568,14 @@ static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
{
vaddr addr_and_idxmap = data.target_ptr;
vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK;
- uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
+ MMUIdxMap idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
}
typedef struct {
vaddr addr;
- uint16_t idxmap;
+ MMUIdxMap idxmap;
} TLBFlushPageByMMUIdxData;
/**
@@ -594,7 +597,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
g_free(d);
}
-void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
+void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, MMUIdxMap idxmap)
{
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
@@ -613,7 +616,7 @@ void tlb_flush_page(CPUState *cpu, vaddr addr)
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
- uint16_t idxmap)
+ MMUIdxMap idxmap)
{
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
@@ -662,7 +665,7 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
unsigned bits)
{
CPUTLBDesc *d = &cpu->neg.tlb.d[midx];
- CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
+ CPUTLBDescFast *f = cpu_tlb_fast(cpu, midx);
vaddr mask = MAKE_64BIT_MASK(0, bits);
/*
@@ -710,8 +713,8 @@ static void tlb_flush_range_locked(CPUState *cpu, int midx,
typedef struct {
vaddr addr;
vaddr len;
- uint16_t idxmap;
- uint16_t bits;
+ MMUIdxMap idxmap;
+ unsigned bits;
} TLBFlushRangeData;
static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
@@ -761,26 +764,26 @@ static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
}
void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
- vaddr len, uint16_t idxmap,
+ vaddr len, MMUIdxMap idxmap,
unsigned bits)
{
TLBFlushRangeData d;
assert_cpu_is_self(cpu);
+ /* If no page bits are significant, this devolves to tlb_flush. */
+ if (bits < TARGET_PAGE_BITS) {
+ tlb_flush_by_mmuidx(cpu, idxmap);
+ return;
+ }
/*
* If all bits are significant, and len is small,
* this devolves to tlb_flush_page.
*/
- if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
+ if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) {
tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
return;
}
- /* If no page bits are significant, this devolves to tlb_flush. */
- if (bits < TARGET_PAGE_BITS) {
- tlb_flush_by_mmuidx(cpu, idxmap);
- return;
- }
/* This should already be page aligned */
d.addr = addr & TARGET_PAGE_MASK;
@@ -792,7 +795,7 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
}
void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
- uint16_t idxmap, unsigned bits)
+ MMUIdxMap idxmap, unsigned bits)
{
tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
}
@@ -800,25 +803,25 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
vaddr len,
- uint16_t idxmap,
+ MMUIdxMap idxmap,
unsigned bits)
{
TLBFlushRangeData d, *p;
CPUState *dst_cpu;
+ /* If no page bits are significant, this devolves to tlb_flush. */
+ if (bits < TARGET_PAGE_BITS) {
+ tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
+ return;
+ }
/*
* If all bits are significant, and len is small,
* this devolves to tlb_flush_page.
*/
- if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
+ if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) {
tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
return;
}
- /* If no page bits are significant, this devolves to tlb_flush. */
- if (bits < TARGET_PAGE_BITS) {
- tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
- return;
- }
/* This should already be page aligned */
d.addr = addr & TARGET_PAGE_MASK;
@@ -842,7 +845,7 @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
- uint16_t idxmap,
+ MMUIdxMap idxmap,
unsigned bits)
{
tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
@@ -853,7 +856,7 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
can be detected */
void tlb_protect_code(ram_addr_t ram_addr)
{
- cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
+ physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
TARGET_PAGE_SIZE,
DIRTY_MEMORY_CODE);
}
@@ -862,7 +865,7 @@ void tlb_protect_code(ram_addr_t ram_addr)
tested for self modifying code */
void tlb_unprotect_code(ram_addr_t ram_addr)
{
- cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
+ physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
@@ -882,18 +885,17 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
*
* Called with tlb_c.lock held.
*/
-static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
+static void tlb_reset_dirty_range_locked(CPUTLBEntryFull *full, CPUTLBEntry *ent,
uintptr_t start, uintptr_t length)
{
- uintptr_t addr = tlb_entry->addr_write;
+ const uintptr_t addr = ent->addr_write;
+ int flags = addr | full->slow_flags[MMU_DATA_STORE];
- if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
- TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
- addr &= TARGET_PAGE_MASK;
- addr += tlb_entry->addend;
- if ((addr - start) < length) {
- qatomic_set(&tlb_entry->addr_write,
- tlb_entry->addr_write | TLB_NOTDIRTY);
+ flags &= TLB_INVALID_MASK | TLB_MMIO | TLB_DISCARD_WRITE | TLB_NOTDIRTY;
+ if (flags == 0) {
+ uintptr_t host = (addr & TARGET_PAGE_MASK) + ent->addend;
+ if ((host - start) < length) {
+ qatomic_set(&ent->addr_write, addr | TLB_NOTDIRTY);
}
}
}
@@ -912,23 +914,25 @@ static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
* We must take tlb_c.lock to avoid racing with another vCPU update. The only
* thing actually updated is the target TLB entry ->addr_write flags.
*/
-void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
+void tlb_reset_dirty(CPUState *cpu, uintptr_t start, uintptr_t length)
{
int mmu_idx;
qemu_spin_lock(&cpu->neg.tlb.c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
+ CPUTLBDescFast *fast = cpu_tlb_fast(cpu, mmu_idx);
+ unsigned int n = tlb_n_entries(fast);
unsigned int i;
- unsigned int n = tlb_n_entries(&cpu->neg.tlb.f[mmu_idx]);
for (i = 0; i < n; i++) {
- tlb_reset_dirty_range_locked(&cpu->neg.tlb.f[mmu_idx].table[i],
- start1, length);
+ tlb_reset_dirty_range_locked(&desc->fulltlb[i], &fast->table[i],
+ start, length);
}
for (i = 0; i < CPU_VTLB_SIZE; i++) {
- tlb_reset_dirty_range_locked(&cpu->neg.tlb.d[mmu_idx].vtable[i],
- start1, length);
+ tlb_reset_dirty_range_locked(&desc->vfulltlb[i], &desc->vtable[i],
+ start, length);
}
}
qemu_spin_unlock(&cpu->neg.tlb.c.lock);
@@ -1079,7 +1083,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
if (prot & PAGE_WRITE) {
if (section->readonly) {
write_flags |= TLB_DISCARD_WRITE;
- } else if (cpu_physical_memory_is_clean(iotlb)) {
+ } else if (physical_memory_is_clean(iotlb)) {
write_flags |= TLB_NOTDIRTY;
}
}
@@ -1310,7 +1314,7 @@ static bool victim_tlb_hit(CPUState *cpu, size_t mmu_idx, size_t index,
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
- CPUTLBEntry tmptlb, *tlb = &cpu->neg.tlb.f[mmu_idx].table[index];
+ CPUTLBEntry tmptlb, *tlb = &cpu_tlb_fast(cpu, mmu_idx)->table[index];
qemu_spin_lock(&cpu->neg.tlb.c.lock);
copy_tlb_helper_locked(&tmptlb, tlb);
@@ -1335,18 +1339,18 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
- if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
- tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
+ if (!physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+ tb_invalidate_phys_range_fast(cpu, ram_addr, size, retaddr);
}
/*
* Set both VGA and migration bits for simplicity and to remove
* the notdirty callback faster.
*/
- cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
+ physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
/* We remove the notdirty callback only if the code has been flushed. */
- if (!cpu_physical_memory_is_clean(ram_addr)) {
+ if (!physical_memory_is_clean(ram_addr)) {
trace_memory_notdirty_set_dirty(mem_vaddr);
tlb_set_dirty(cpu, mem_vaddr);
}
@@ -1738,6 +1742,7 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
{
bool crosspage;
+ vaddr last;
int flags;
l->memop = get_memop(oi);
@@ -1747,13 +1752,15 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
l->page[0].addr = addr;
l->page[0].size = memop_size(l->memop);
- l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
+ l->page[1].addr = 0;
l->page[1].size = 0;
- crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
- if (likely(!crosspage)) {
- mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
+ /* Lookup and recognize exceptions from the first page. */
+ mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
+ last = addr + l->page[0].size - 1;
+ crosspage = (addr ^ last) & TARGET_PAGE_MASK;
+ if (likely(!crosspage)) {
flags = l->page[0].flags;
if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
@@ -1763,15 +1770,18 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
}
} else {
/* Finish compute of page crossing. */
- int size0 = l->page[1].addr - addr;
+ vaddr addr1 = last & TARGET_PAGE_MASK;
+ int size0 = addr1 - addr;
l->page[1].size = l->page[0].size - size0;
l->page[0].size = size0;
+ l->page[1].addr = cpu->cc->tcg_ops->pointer_wrap(cpu, l->mmu_idx,
+ addr1, addr);
/*
- * Lookup both pages, recognizing exceptions from either. If the
- * second lookup potentially resized, refresh first CPUTLBEntryFull.
+ * Lookup and recognize exceptions from the second page.
+ * If the lookup potentially resized the table, refresh the
+ * first CPUTLBEntryFull pointer.
*/
- mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
if (mmu_lookup1(cpu, &l->page[1], 0, l->mmu_idx, type, ra)) {
uintptr_t index = tlb_index(cpu, l->mmu_idx, addr);
l->page[0].full = &cpu->neg.tlb.d[l->mmu_idx].fulltlb[index];
@@ -1865,8 +1875,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
goto stop_the_world;
}
- /* Collect tlb flags for read. */
+ /* Finish collecting tlb flags for both read and write. */
+ full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
tlb_addr |= tlbe->addr_read;
+ tlb_addr &= TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
+ tlb_addr |= full->slow_flags[MMU_DATA_STORE];
+ tlb_addr |= full->slow_flags[MMU_DATA_LOAD];
/* Notice an IO access or a needs-MMU-lookup access */
if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
@@ -1876,13 +1890,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
}
hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
- full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
notdirty_write(cpu, addr, size, full, retaddr);
}
- if (unlikely(tlb_addr & TLB_FORCE_SLOW)) {
+ if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
int wp_flags = 0;
if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) {
@@ -1891,10 +1904,8 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) {
wp_flags |= BP_MEM_READ;
}
- if (wp_flags) {
- cpu_check_watchpoint(cpu, addr, size,
- full->attrs, wp_flags, retaddr);
- }
+ cpu_check_watchpoint(cpu, addr, size,
+ full->attrs, wp_flags, retaddr);
}
return hostaddr;
@@ -2321,7 +2332,7 @@ static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
MMULookupLocals l;
bool crosspage;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
tcg_debug_assert(!crosspage);
@@ -2336,7 +2347,7 @@ static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
uint16_t ret;
uint8_t a, b;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
if (likely(!crosspage)) {
return do_ld_2(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2360,7 +2371,7 @@ static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
bool crosspage;
uint32_t ret;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
if (likely(!crosspage)) {
return do_ld_4(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2381,7 +2392,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
bool crosspage;
uint64_t ret;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
if (likely(!crosspage)) {
return do_ld_8(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
@@ -2404,7 +2415,7 @@ static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
Int128 ret;
int first;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_LOAD, &l);
if (likely(!crosspage)) {
if (unlikely(l.page[0].flags & TLB_MMIO)) {
@@ -2732,7 +2743,7 @@ static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
MMULookupLocals l;
bool crosspage;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
tcg_debug_assert(!crosspage);
@@ -2746,7 +2757,7 @@ static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
bool crosspage;
uint8_t a, b;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_2(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2768,7 +2779,7 @@ static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
MMULookupLocals l;
bool crosspage;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_4(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2789,7 +2800,7 @@ static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
MMULookupLocals l;
bool crosspage;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
do_st_8(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
@@ -2812,7 +2823,7 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
uint64_t a, b;
int first;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
if (likely(!crosspage)) {
if (unlikely(l.page[0].flags & TLB_MMIO)) {
@@ -2897,54 +2908,45 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
/* Code access functions. */
-uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
-{
- CPUState *cs = env_cpu(env);
- MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(cs, true));
- return do_ld1_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
-}
-
-uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
-{
- CPUState *cs = env_cpu(env);
- MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(cs, true));
- return do_ld2_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
-}
-
-uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
-{
- CPUState *cs = env_cpu(env);
- MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(cs, true));
- return do_ld4_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
-}
-
-uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
-{
- CPUState *cs = env_cpu(env);
- MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(cs, true));
- return do_ld8_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
-}
-
-uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t retaddr)
{
return do_ld1_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
}
-uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t retaddr)
{
return do_ld2_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
}
-uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t retaddr)
{
return do_ld4_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
}
-uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t retaddr)
{
return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
}
+
+/*
+ * Common pointer_wrap implementations.
+ */
+
+/*
+ * To be used for strict alignment targets.
+ * Because no accesses are unaligned, no accesses wrap either.
+ */
+vaddr cpu_pointer_wrap_notreached(CPUState *cs, int idx, vaddr res, vaddr base)
+{
+ g_assert_not_reached();
+}
+
+/* To be used for strict 32-bit targets. */
+vaddr cpu_pointer_wrap_uint32(CPUState *cs, int idx, vaddr res, vaddr base)
+{
+ return (uint32_t)res;
+}
diff --git a/accel/tcg/icount-common.c b/accel/tcg/icount-common.c
index 402d3e3..d647117 100644
--- a/accel/tcg/icount-common.c
+++ b/accel/tcg/icount-common.c
@@ -35,7 +35,7 @@
#include "system/replay.h"
#include "system/runstate.h"
#include "hw/core/cpu.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "system/cpu-timers-internal.h"
/*
diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h
index 9b6ab3a..6adfeef 100644
--- a/accel/tcg/internal-common.h
+++ b/accel/tcg/internal-common.h
@@ -11,6 +11,8 @@
#include "exec/cpu-common.h"
#include "exec/translation-block.h"
+#include "exec/mmap-lock.h"
+#include "accel/tcg/tb-cpu-state.h"
extern int64_t max_delay;
extern int64_t max_advance;
@@ -45,9 +47,7 @@ static inline bool cpu_plugin_mem_cbs_enabled(const CPUState *cpu)
#endif
}
-TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc,
- uint64_t cs_base, uint32_t flags,
- int cflags);
+TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s);
void page_init(void);
void tb_htable_init(void);
void tb_reset_jump(TranslationBlock *tb, int n);
@@ -74,4 +74,71 @@ uint32_t curr_cflags(CPUState *cpu);
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
+/**
+ * get_page_addr_code_hostp()
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * See get_page_addr_code() (full-system version) for documentation on the
+ * return value.
+ *
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
+ * to the host address where @addr's content is kept.
+ *
+ * Note: this function can trigger an exception.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
+ void **hostp);
+
+/**
+ * get_page_addr_code()
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * If we cannot translate and execute from the entire RAM page, or if
+ * the region is not backed by RAM, returns -1. Otherwise, returns the
+ * ram_addr_t corresponding to the guest code at @addr.
+ *
+ * Note: this function can trigger an exception.
+ */
+static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
+ vaddr addr)
+{
+ return get_page_addr_code_hostp(env, addr, NULL);
+}
+
+/*
+ * Access to the various translations structures need to be serialised
+ * via locks for consistency. In user-mode emulation access to the
+ * memory related structures are protected with mmap_lock.
+ * In !user-mode we use per-page locks.
+ */
+#ifdef CONFIG_USER_ONLY
+#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
+#else
+#define assert_memory_lock()
+#endif
+
+#if defined(CONFIG_SOFTMMU) && defined(CONFIG_DEBUG_TCG)
+void assert_no_pages_locked(void);
+#else
+static inline void assert_no_pages_locked(void) { }
+#endif
+
+#ifdef CONFIG_USER_ONLY
+static inline void page_table_config_init(void) { }
+#else
+void page_table_config_init(void);
+#endif
+
+#ifndef CONFIG_USER_ONLY
+G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
+#endif /* CONFIG_USER_ONLY */
+
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
+
+void tcg_get_stats(AccelState *accel, GString *buf);
+
#endif
diff --git a/accel/tcg/internal-target.h b/accel/tcg/internal-target.h
deleted file mode 100644
index 2cdf11c..0000000
--- a/accel/tcg/internal-target.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Internal execution defines for qemu (target specific)
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * SPDX-License-Identifier: LGPL-2.1-or-later
- */
-
-#ifndef ACCEL_TCG_INTERNAL_TARGET_H
-#define ACCEL_TCG_INTERNAL_TARGET_H
-
-#include "exec/exec-all.h"
-#include "exec/translation-block.h"
-#include "tb-internal.h"
-#include "tcg-target-mo.h"
-
-/*
- * Access to the various translations structures need to be serialised
- * via locks for consistency. In user-mode emulation access to the
- * memory related structures are protected with mmap_lock.
- * In !user-mode we use per-page locks.
- */
-#ifdef CONFIG_USER_ONLY
-#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
-#else
-#define assert_memory_lock()
-#endif
-
-#if defined(CONFIG_SOFTMMU) && defined(CONFIG_DEBUG_TCG)
-void assert_no_pages_locked(void);
-#else
-static inline void assert_no_pages_locked(void) { }
-#endif
-
-#ifdef CONFIG_USER_ONLY
-static inline void page_table_config_init(void) { }
-#else
-void page_table_config_init(void);
-#endif
-
-#ifndef CONFIG_USER_ONLY
-G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
-#endif /* CONFIG_USER_ONLY */
-
-/**
- * tcg_req_mo:
- * @type: TCGBar
- *
- * Filter @type to the barrier that is required for the guest
- * memory ordering vs the host memory ordering. A non-zero
- * result indicates that some barrier is required.
- *
- * If TCG_GUEST_DEFAULT_MO is not defined, assume that the
- * guest requires strict ordering.
- *
- * This is a macro so that it's constant even without optimization.
- */
-#ifdef TCG_GUEST_DEFAULT_MO
-# define tcg_req_mo(type) \
- ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO)
-#else
-# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO)
-#endif
-
-/**
- * cpu_req_mo:
- * @type: TCGBar
- *
- * If tcg_req_mo indicates a barrier for @type is required
- * for the guest memory model, issue a host memory barrier.
- */
-#define cpu_req_mo(type) \
- do { \
- if (tcg_req_mo(type)) { \
- smp_mb(); \
- } \
- } while (0)
-
-#endif /* ACCEL_TCG_INTERNAL_H */
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
index ebbf380..57f3e06 100644
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -123,7 +123,7 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
* Load helpers for cpu_ldst.h
*/
-static void plugin_load_cb(CPUArchState *env, abi_ptr addr,
+static void plugin_load_cb(CPUArchState *env, vaddr addr,
uint64_t value_low,
uint64_t value_high,
MemOpIdx oi)
@@ -135,7 +135,7 @@ static void plugin_load_cb(CPUArchState *env, abi_ptr addr,
}
}
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
+uint8_t cpu_ldb_mmu(CPUArchState *env, vaddr addr, MemOpIdx oi, uintptr_t ra)
{
uint8_t ret;
@@ -145,7 +145,7 @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
return ret;
}
-uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
+uint16_t cpu_ldw_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
uint16_t ret;
@@ -156,7 +156,7 @@ uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
-uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
+uint32_t cpu_ldl_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
uint32_t ret;
@@ -167,7 +167,7 @@ uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
-uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
+uint64_t cpu_ldq_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
uint64_t ret;
@@ -178,7 +178,7 @@ uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
-Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
+Int128 cpu_ld16_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
Int128 ret;
@@ -193,7 +193,7 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
* Store helpers for cpu_ldst.h
*/
-static void plugin_store_cb(CPUArchState *env, abi_ptr addr,
+static void plugin_store_cb(CPUArchState *env, vaddr addr,
uint64_t value_low,
uint64_t value_high,
MemOpIdx oi)
@@ -205,14 +205,14 @@ static void plugin_store_cb(CPUArchState *env, abi_ptr addr,
}
}
-void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
+void cpu_stb_mmu(CPUArchState *env, vaddr addr, uint8_t val,
MemOpIdx oi, uintptr_t retaddr)
{
helper_stb_mmu(env, addr, val, oi, retaddr);
plugin_store_cb(env, addr, val, 0, oi);
}
-void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
+void cpu_stw_mmu(CPUArchState *env, vaddr addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
@@ -220,7 +220,7 @@ void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
plugin_store_cb(env, addr, val, 0, oi);
}
-void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
+void cpu_stl_mmu(CPUArchState *env, vaddr addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
@@ -228,7 +228,7 @@ void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
plugin_store_cb(env, addr, val, 0, oi);
}
-void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
+void cpu_stq_mmu(CPUArchState *env, vaddr addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
@@ -236,325 +236,10 @@ void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
plugin_store_cb(env, addr, val, 0, oi);
}
-void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+void cpu_st16_mmu(CPUArchState *env, vaddr addr, Int128 val,
MemOpIdx oi, uintptr_t retaddr)
{
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
do_st16_mmu(env_cpu(env), addr, val, oi, retaddr);
plugin_store_cb(env, addr, int128_getlo(val), int128_gethi(val), oi);
}
-
-/*
- * Wrappers of the above
- */
-
-uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
- return cpu_ldb_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
- return cpu_ldw_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
- return cpu_ldl_mmu(env, addr, oi, ra);
-}
-
-uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
- return cpu_ldq_mmu(env, addr, oi, ra);
-}
-
-uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
- return cpu_ldw_mmu(env, addr, oi, ra);
-}
-
-int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra);
-}
-
-uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
- return cpu_ldl_mmu(env, addr, oi, ra);
-}
-
-uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
- return cpu_ldq_mmu(env, addr, oi, ra);
-}
-
-void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
- cpu_stb_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
- cpu_stw_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
- cpu_stl_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
- cpu_stq_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
- cpu_stw_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
- cpu_stl_mmu(env, addr, val, oi, ra);
-}
-
-void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
- int mmu_idx, uintptr_t ra)
-{
- MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
- cpu_stq_mmu(env, addr, val, oi, ra);
-}
-
-/*--------------------------*/
-
-uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_ldub_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- return (int8_t)cpu_ldub_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_lduw_be_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- return (int16_t)cpu_lduw_be_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_ldl_be_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_ldq_be_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_lduw_le_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- return (int16_t)cpu_lduw_le_data_ra(env, addr, ra);
-}
-
-uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_ldl_le_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- return cpu_ldq_le_mmuidx_ra(env, addr, mmu_index, ra);
-}
-
-void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr,
- uint32_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stb_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr,
- uint32_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stw_be_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr,
- uint32_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stl_be_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr,
- uint64_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stq_be_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr,
- uint32_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stw_le_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr,
- uint32_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stl_le_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr,
- uint64_t val, uintptr_t ra)
-{
- int mmu_index = cpu_mmu_index(env_cpu(env), false);
- cpu_stq_le_mmuidx_ra(env, addr, val, mmu_index, ra);
-}
-
-/*--------------------------*/
-
-uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_ldub_data_ra(env, addr, 0);
-}
-
-int cpu_ldsb_data(CPUArchState *env, abi_ptr addr)
-{
- return (int8_t)cpu_ldub_data(env, addr);
-}
-
-uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_lduw_be_data_ra(env, addr, 0);
-}
-
-int cpu_ldsw_be_data(CPUArchState *env, abi_ptr addr)
-{
- return (int16_t)cpu_lduw_be_data(env, addr);
-}
-
-uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_ldl_be_data_ra(env, addr, 0);
-}
-
-uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_ldq_be_data_ra(env, addr, 0);
-}
-
-uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_lduw_le_data_ra(env, addr, 0);
-}
-
-int cpu_ldsw_le_data(CPUArchState *env, abi_ptr addr)
-{
- return (int16_t)cpu_lduw_le_data(env, addr);
-}
-
-uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_ldl_le_data_ra(env, addr, 0);
-}
-
-uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr addr)
-{
- return cpu_ldq_le_data_ra(env, addr, 0);
-}
-
-void cpu_stb_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
- cpu_stb_data_ra(env, addr, val, 0);
-}
-
-void cpu_stw_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
- cpu_stw_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stl_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
- cpu_stl_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stq_be_data(CPUArchState *env, abi_ptr addr, uint64_t val)
-{
- cpu_stq_be_data_ra(env, addr, val, 0);
-}
-
-void cpu_stw_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
- cpu_stw_le_data_ra(env, addr, val, 0);
-}
-
-void cpu_stl_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
-{
- cpu_stl_le_data_ra(env, addr, val, 0);
-}
-
-void cpu_stq_le_data(CPUArchState *env, abi_ptr addr, uint64_t val)
-{
- cpu_stq_le_data_ra(env, addr, val, 0);
-}
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 38ff227..002aa8f 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -1,28 +1,34 @@
-common_ss.add(when: 'CONFIG_TCG', if_true: files(
+if not have_tcg
+ subdir_done()
+endif
+
+tcg_ss = ss.source_set()
+
+tcg_ss.add(files(
+ 'cpu-exec.c',
'cpu-exec-common.c',
'tcg-runtime.c',
'tcg-runtime-gvec.c',
-))
-tcg_specific_ss = ss.source_set()
-tcg_specific_ss.add(files(
- 'tcg-all.c',
- 'cpu-exec.c',
'tb-maint.c',
+ 'tcg-all.c',
+ 'tcg-stats.c',
'translate-all.c',
'translator.c',
))
-tcg_specific_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
-tcg_specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c'))
if get_option('plugins')
- tcg_specific_ss.add(files('plugin-gen.c'))
+ tcg_ss.add(files('plugin-gen.c'))
endif
-specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_specific_ss)
-specific_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files(
- 'cputlb.c',
+user_ss.add_all(tcg_ss)
+system_ss.add_all(tcg_ss)
+
+user_ss.add(files(
+ 'user-exec.c',
+ 'user-exec-stub.c',
))
-system_ss.add(when: ['CONFIG_TCG'], if_true: files(
+system_ss.add(files(
+ 'cputlb.c',
'icount-common.c',
'monitor.c',
'tcg-accel-ops.c',
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index eeb38a4..be5c195 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -7,196 +7,13 @@
*/
#include "qemu/osdep.h"
-#include "qemu/accel.h"
-#include "qemu/qht.h"
#include "qapi/error.h"
#include "qapi/type-helpers.h"
#include "qapi/qapi-commands-machine.h"
#include "monitor/monitor.h"
-#include "system/cpu-timers.h"
#include "system/tcg.h"
#include "tcg/tcg.h"
#include "internal-common.h"
-#include "tb-context.h"
-
-
-static void dump_drift_info(GString *buf)
-{
- if (!icount_enabled()) {
- return;
- }
-
- g_string_append_printf(buf, "Host - Guest clock %"PRIi64" ms\n",
- (cpu_get_clock() - icount_get()) / SCALE_MS);
- if (icount_align_option) {
- g_string_append_printf(buf, "Max guest delay %"PRIi64" ms\n",
- -max_delay / SCALE_MS);
- g_string_append_printf(buf, "Max guest advance %"PRIi64" ms\n",
- max_advance / SCALE_MS);
- } else {
- g_string_append_printf(buf, "Max guest delay NA\n");
- g_string_append_printf(buf, "Max guest advance NA\n");
- }
-}
-
-static void dump_accel_info(GString *buf)
-{
- AccelState *accel = current_accel();
- bool one_insn_per_tb = object_property_get_bool(OBJECT(accel),
- "one-insn-per-tb",
- &error_fatal);
-
- g_string_append_printf(buf, "Accelerator settings:\n");
- g_string_append_printf(buf, "one-insn-per-tb: %s\n\n",
- one_insn_per_tb ? "on" : "off");
-}
-
-static void print_qht_statistics(struct qht_stats hst, GString *buf)
-{
- uint32_t hgram_opts;
- size_t hgram_bins;
- char *hgram;
-
- if (!hst.head_buckets) {
- return;
- }
- g_string_append_printf(buf, "TB hash buckets %zu/%zu "
- "(%0.2f%% head buckets used)\n",
- hst.used_head_buckets, hst.head_buckets,
- (double)hst.used_head_buckets /
- hst.head_buckets * 100);
-
- hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
- hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
- if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
- hgram_opts |= QDIST_PR_NODECIMAL;
- }
- hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
- g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
- "Histogram: %s\n",
- qdist_avg(&hst.occupancy) * 100, hgram);
- g_free(hgram);
-
- hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
- hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
- if (hgram_bins > 10) {
- hgram_bins = 10;
- } else {
- hgram_bins = 0;
- hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
- }
- hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
- g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
- "Histogram: %s\n",
- qdist_avg(&hst.chain), hgram);
- g_free(hgram);
-}
-
-struct tb_tree_stats {
- size_t nb_tbs;
- size_t host_size;
- size_t target_size;
- size_t max_target_size;
- size_t direct_jmp_count;
- size_t direct_jmp2_count;
- size_t cross_page;
-};
-
-static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
-{
- const TranslationBlock *tb = value;
- struct tb_tree_stats *tst = data;
-
- tst->nb_tbs++;
- tst->host_size += tb->tc.size;
- tst->target_size += tb->size;
- if (tb->size > tst->max_target_size) {
- tst->max_target_size = tb->size;
- }
- if (tb->page_addr[1] != -1) {
- tst->cross_page++;
- }
- if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
- tst->direct_jmp_count++;
- if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
- tst->direct_jmp2_count++;
- }
- }
- return false;
-}
-
-static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
-{
- CPUState *cpu;
- size_t full = 0, part = 0, elide = 0;
-
- CPU_FOREACH(cpu) {
- full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
- part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
- elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
- }
- *pfull = full;
- *ppart = part;
- *pelide = elide;
-}
-
-static void tcg_dump_info(GString *buf)
-{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
-}
-
-static void dump_exec_info(GString *buf)
-{
- struct tb_tree_stats tst = {};
- struct qht_stats hst;
- size_t nb_tbs, flush_full, flush_part, flush_elide;
-
- tcg_tb_foreach(tb_tree_stats_iter, &tst);
- nb_tbs = tst.nb_tbs;
- /* XXX: avoid using doubles ? */
- g_string_append_printf(buf, "Translation buffer state:\n");
- /*
- * Report total code size including the padding and TB structs;
- * otherwise users might think "-accel tcg,tb-size" is not honoured.
- * For avg host size we use the precise numbers from tb_tree_stats though.
- */
- g_string_append_printf(buf, "gen code size %zu/%zu\n",
- tcg_code_size(), tcg_code_capacity());
- g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
- g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
- nb_tbs ? tst.target_size / nb_tbs : 0,
- tst.max_target_size);
- g_string_append_printf(buf, "TB avg host size %zu bytes "
- "(expansion ratio: %0.1f)\n",
- nb_tbs ? tst.host_size / nb_tbs : 0,
- tst.target_size ?
- (double)tst.host_size / tst.target_size : 0);
- g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
- tst.cross_page,
- nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
- g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
- "(2 jumps=%zu %zu%%)\n",
- tst.direct_jmp_count,
- nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
- tst.direct_jmp2_count,
- nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
-
- qht_statistics_init(&tb_ctx.htable, &hst);
- print_qht_statistics(hst, buf);
- qht_statistics_destroy(&hst);
-
- g_string_append_printf(buf, "\nStatistics:\n");
- g_string_append_printf(buf, "TB flush count %u\n",
- qatomic_read(&tb_ctx.tb_flush_count));
- g_string_append_printf(buf, "TB invalidate count %u\n",
- qatomic_read(&tb_ctx.tb_phys_invalidate_count));
-
- tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
- g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
- g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
- g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
- tcg_dump_info(buf);
-}
HumanReadableText *qmp_x_query_jit(Error **errp)
{
@@ -207,29 +24,7 @@ HumanReadableText *qmp_x_query_jit(Error **errp)
return NULL;
}
- dump_accel_info(buf);
- dump_exec_info(buf);
- dump_drift_info(buf);
-
- return human_readable_text_from_str(buf);
-}
-
-static void tcg_dump_op_count(GString *buf)
-{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
-}
-
-HumanReadableText *qmp_x_query_opcount(Error **errp)
-{
- g_autoptr(GString) buf = g_string_new("");
-
- if (!tcg_enabled()) {
- error_setg(errp,
- "Opcode count information is only available with accel=tcg");
- return NULL;
- }
-
- tcg_dump_op_count(buf);
+ tcg_dump_stats(buf);
return human_readable_text_from_str(buf);
}
@@ -237,7 +32,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
static void hmp_tcg_register(void)
{
monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
- monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount);
}
type_init(hmp_tcg_register);
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 7e5f040..1ffcb4b 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -22,13 +22,12 @@
#include "qemu/osdep.h"
#include "qemu/plugin.h"
#include "qemu/log.h"
-#include "cpu.h"
#include "tcg/tcg.h"
#include "tcg/tcg-temp-internal.h"
-#include "tcg/tcg-op.h"
-#include "exec/exec-all.h"
+#include "tcg/tcg-op-common.h"
#include "exec/plugin-gen.h"
#include "exec/translator.h"
+#include "exec/translation-block.h"
enum plugin_gen_from {
PLUGIN_GEN_FROM_TB,
@@ -89,15 +88,13 @@ static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb,
qemu_plugin_add_dyn_cb_arr(arr);
tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env,
- offsetof(CPUState, neg.plugin_mem_cbs) -
- offsetof(ArchCPU, env));
+ offsetof(CPUState, neg.plugin_mem_cbs) - sizeof(CPUState));
}
static void gen_disable_mem_helper(void)
{
tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env,
- offsetof(CPUState, neg.plugin_mem_cbs) -
- offsetof(ArchCPU, env));
+ offsetof(CPUState, neg.plugin_mem_cbs) - sizeof(CPUState));
}
static TCGv_i32 gen_cpu_index(void)
@@ -105,25 +102,35 @@ static TCGv_i32 gen_cpu_index(void)
/*
* Optimize when we run with a single vcpu. All values using cpu_index,
* including scoreboard index, will be optimized out.
- * User-mode calls tb_flush when setting this flag. In system-mode, all
- * vcpus are created before generating code.
+ * User-mode flushes all TBs when setting this flag.
+ * In system-mode, all vcpus are created before generating code.
*/
if (!tcg_cflags_has(current_cpu, CF_PARALLEL)) {
return tcg_constant_i32(current_cpu->cpu_index);
}
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
tcg_gen_ld_i32(cpu_index, tcg_env,
- -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+ offsetof(CPUState, cpu_index) - sizeof(CPUState));
return cpu_index;
}
static void gen_udata_cb(struct qemu_plugin_regular_cb *cb)
{
TCGv_i32 cpu_index = gen_cpu_index();
+ enum qemu_plugin_cb_flags cb_flags =
+ tcg_call_to_qemu_plugin_cb_flags(cb->info->flags);
+ TCGv_i32 flags = tcg_constant_i32(cb_flags);
+ TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS);
+ tcg_gen_st_i32(flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
tcgv_i32_temp(cpu_index),
tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
+ tcg_gen_st_i32(clear_flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_temp_free_i32(cpu_index);
+ tcg_temp_free_i32(flags);
+ tcg_temp_free_i32(clear_flags);
}
static TCGv_ptr gen_plugin_u64_ptr(qemu_plugin_u64 entry)
@@ -176,10 +183,20 @@ static void gen_udata_cond_cb(struct qemu_plugin_conditional_cb *cb)
tcg_gen_ld_i64(val, ptr, 0);
tcg_gen_brcondi_i64(cond, val, cb->imm, after_cb);
TCGv_i32 cpu_index = gen_cpu_index();
+ enum qemu_plugin_cb_flags cb_flags =
+ tcg_call_to_qemu_plugin_cb_flags(cb->info->flags);
+ TCGv_i32 flags = tcg_constant_i32(cb_flags);
+ TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS);
+ tcg_gen_st_i32(flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_gen_call2(cb->f.vcpu_udata, cb->info, NULL,
tcgv_i32_temp(cpu_index),
tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
+ tcg_gen_st_i32(clear_flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_temp_free_i32(cpu_index);
+ tcg_temp_free_i32(flags);
+ tcg_temp_free_i32(clear_flags);
gen_set_label(after_cb);
tcg_temp_free_i64(val);
@@ -213,12 +230,22 @@ static void gen_mem_cb(struct qemu_plugin_regular_cb *cb,
qemu_plugin_meminfo_t meminfo, TCGv_i64 addr)
{
TCGv_i32 cpu_index = gen_cpu_index();
+ enum qemu_plugin_cb_flags cb_flags =
+ tcg_call_to_qemu_plugin_cb_flags(cb->info->flags);
+ TCGv_i32 flags = tcg_constant_i32(cb_flags);
+ TCGv_i32 clear_flags = tcg_constant_i32(QEMU_PLUGIN_CB_NO_REGS);
+ tcg_gen_st_i32(flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_gen_call4(cb->f.vcpu_mem, cb->info, NULL,
tcgv_i32_temp(cpu_index),
tcgv_i32_temp(tcg_constant_i32(meminfo)),
tcgv_i64_temp(addr),
tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
+ tcg_gen_st_i32(clear_flags, tcg_env,
+ offsetof(CPUState, neg.plugin_cb_flags) - sizeof(CPUState));
tcg_temp_free_i32(cpu_index);
+ tcg_temp_free_i32(flags);
+ tcg_temp_free_i32(clear_flags);
}
static void inject_cb(struct qemu_plugin_dyn_cb *cb)
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index a5382f4..f7b159f 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -20,8 +20,8 @@
#ifndef EXEC_TB_HASH_H
#define EXEC_TB_HASH_H
-#include "exec/cpu-defs.h"
-#include "exec/exec-all.h"
+#include "exec/vaddr.h"
+#include "exec/target_page.h"
#include "exec/translation-block.h"
#include "qemu/xxhash.h"
#include "tb-jmp-cache.h"
diff --git a/accel/tcg/tb-internal.h b/accel/tcg/tb-internal.h
index 68aa8d1..40439f0 100644
--- a/accel/tcg/tb-internal.h
+++ b/accel/tcg/tb-internal.h
@@ -9,8 +9,6 @@
#ifndef ACCEL_TCG_TB_INTERNAL_TARGET_H
#define ACCEL_TCG_TB_INTERNAL_TARGET_H
-#include "exec/cpu-all.h"
-#include "exec/exec-all.h"
#include "exec/translation-block.h"
/*
@@ -24,66 +22,34 @@
*/
#define GETPC_ADJ 2
-#ifdef CONFIG_SOFTMMU
-
-#define CPU_TLB_DYN_MIN_BITS 6
-#define CPU_TLB_DYN_DEFAULT_BITS 8
-
-# if HOST_LONG_BITS == 32
-/* Make sure we do not require a double-word shift for the TLB load */
-# define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS)
-# else /* HOST_LONG_BITS == 64 */
-/*
- * Assuming TARGET_PAGE_BITS==12, with 2**22 entries we can cover 2**(22+12) ==
- * 2**34 == 16G of address space. This is roughly what one would expect a
- * TLB to cover in a modern (as of 2018) x86_64 CPU. For instance, Intel
- * Skylake's Level-2 STLB has 16 1G entries.
- * Also, make sure we do not size the TLB past the guest's address space.
- */
-# ifdef TARGET_PAGE_BITS_VARY
-# define CPU_TLB_DYN_MAX_BITS \
- MIN(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
-# else
-# define CPU_TLB_DYN_MAX_BITS \
- MIN_CONST(22, TARGET_VIRT_ADDR_SPACE_BITS - TARGET_PAGE_BITS)
-# endif
-# endif
-
-#endif /* CONFIG_SOFTMMU */
+void tb_lock_page0(tb_page_addr_t);
#ifdef CONFIG_USER_ONLY
-#include "user/page-protection.h"
/*
* For user-only, page_protect sets the page read-only.
* Since most execution is already on read-only pages, and we'd need to
* account for other TBs on the same page, defer undoing any page protection
* until we receive the write fault.
*/
-static inline void tb_lock_page0(tb_page_addr_t p0)
-{
- page_protect(p0);
-}
-
static inline void tb_lock_page1(tb_page_addr_t p0, tb_page_addr_t p1)
{
- page_protect(p1);
+ tb_lock_page0(p1);
}
static inline void tb_unlock_page1(tb_page_addr_t p0, tb_page_addr_t p1) { }
static inline void tb_unlock_pages(TranslationBlock *tb) { }
#else
-void tb_lock_page0(tb_page_addr_t);
void tb_lock_page1(tb_page_addr_t, tb_page_addr_t);
void tb_unlock_page1(tb_page_addr_t, tb_page_addr_t);
void tb_unlock_pages(TranslationBlock *);
#endif
#ifdef CONFIG_SOFTMMU
-void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
- unsigned size,
- uintptr_t retaddr);
+void tb_invalidate_phys_range_fast(CPUState *cpu, ram_addr_t ram_addr,
+ unsigned size, uintptr_t retaddr);
#endif /* CONFIG_SOFTMMU */
-bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
+bool tb_invalidate_phys_page_unwind(CPUState *cpu, tb_page_addr_t addr,
+ uintptr_t pc);
#endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 3f1bebf..5a8d078 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -22,9 +22,11 @@
#include "qemu/qtree.h"
#include "exec/cputlb.h"
#include "exec/log.h"
-#include "exec/exec-all.h"
#include "exec/page-protection.h"
+#include "exec/mmap-lock.h"
#include "exec/tb-flush.h"
+#include "exec/target_page.h"
+#include "accel/tcg/cpu-ops.h"
#include "tb-internal.h"
#include "system/tcg.h"
#include "tcg/tcg.h"
@@ -32,9 +34,11 @@
#include "tb-context.h"
#include "tb-internal.h"
#include "internal-common.h"
-#include "internal-target.h"
#ifdef CONFIG_USER_ONLY
#include "user/page-protection.h"
+#define runstate_is_running() true
+#else
+#include "system/runstate.h"
#endif
@@ -87,7 +91,10 @@ static IntervalTreeRoot tb_root;
static void tb_remove_all(void)
{
- assert_memory_lock();
+ /*
+ * Only called from tb_flush__exclusive_or_serial, where we have already
+ * asserted that we're in an exclusive state.
+ */
memset(&tb_root, 0, sizeof(tb_root));
}
@@ -156,11 +163,7 @@ static PageForEachNext foreach_tb_next(PageForEachNext tb,
/*
* In system mode we want L1_MAP to be based on ram offsets.
*/
-#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
-# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
-#else
-# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
-#endif
+#define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
/* Size of the L2 (and L3, etc) page tables. */
#define V_L2_BITS 10
@@ -759,17 +762,19 @@ static void tb_remove(TranslationBlock *tb)
}
#endif /* CONFIG_USER_ONLY */
-/* flush all the translation blocks */
-static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
+/*
+ * Flush all the translation blocks.
+ * Must be called from a context in which no cpus are running,
+ * e.g. start_exclusive() or vm_stop().
+ */
+void tb_flush__exclusive_or_serial(void)
{
- bool did_flush = false;
+ CPUState *cpu;
- mmap_lock();
- /* If it is already been done on request of another CPU, just retry. */
- if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
- goto done;
- }
- did_flush = true;
+ assert(tcg_enabled());
+ /* Note that cpu_in_serial_context checks cpu_in_exclusive_context. */
+ assert(!runstate_is_running() ||
+ (current_cpu && cpu_in_serial_context(current_cpu)));
CPU_FOREACH(cpu) {
tcg_flush_jmp_cache(cpu);
@@ -781,25 +786,23 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
tcg_region_reset_all();
/* XXX: flush processor icache at this point if cache flush is expensive */
qatomic_inc(&tb_ctx.tb_flush_count);
+ qemu_plugin_flush_cb();
+}
-done:
- mmap_unlock();
- if (did_flush) {
- qemu_plugin_flush_cb();
+static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
+{
+ /* If it is already been done on request of another CPU, just retry. */
+ if (tb_ctx.tb_flush_count == tb_flush_count.host_int) {
+ tb_flush__exclusive_or_serial();
}
}
-void tb_flush(CPUState *cpu)
+void queue_tb_flush(CPUState *cs)
{
if (tcg_enabled()) {
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
-
- if (cpu_in_serial_context(cpu)) {
- do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
- } else {
- async_safe_run_on_cpu(cpu, do_tb_flush,
- RUN_ON_CPU_HOST_INT(tb_flush_count));
- }
+ async_safe_run_on_cpu(cs, do_tb_flush,
+ RUN_ON_CPU_HOST_INT(tb_flush_count));
}
}
@@ -839,6 +842,14 @@ static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
* We first acquired the lock, and since the destination pointer matches,
* we know for sure that @orig is in the jmp list.
*/
+ if (dest == orig) {
+ /*
+ * In the case of a TB that links to itself, removing the entry
+ * from the list means that it won't be present later during
+ * tb_jmp_unlink -- unlink now.
+ */
+ tb_reset_jump(orig, n_orig);
+ }
pprev = &dest->jmp_list_head;
TB_FOR_EACH_JMP(dest, tb, n) {
if (tb == orig && n == n_orig) {
@@ -1009,7 +1020,8 @@ TranslationBlock *tb_link_page(TranslationBlock *tb)
* Called with mmap_lock held for user-mode emulation.
* NOTE: this function must not be called while a TB is running.
*/
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
+void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start,
+ tb_page_addr_t last)
{
TranslationBlock *tb;
PageForEachNext n;
@@ -1032,17 +1044,16 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr)
start = addr & TARGET_PAGE_MASK;
last = addr | ~TARGET_PAGE_MASK;
- tb_invalidate_phys_range(start, last);
+ tb_invalidate_phys_range(NULL, start, last);
}
/*
* Called with mmap_lock held. If pc is not 0 then it indicates the
* host PC of the faulting store instruction that caused this invalidate.
- * Returns true if the caller needs to abort execution of the current
- * TB (because it was modified by this store and the guest CPU has
- * precise-SMC semantics).
+ * Returns true if the caller needs to abort execution of the current TB.
*/
-bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
+bool tb_invalidate_phys_page_unwind(CPUState *cpu, tb_page_addr_t addr,
+ uintptr_t pc)
{
TranslationBlock *current_tb;
bool current_tb_modified;
@@ -1054,10 +1065,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
* Without precise smc semantics, or when outside of a TB,
* we can skip to invalidate.
*/
-#ifndef TARGET_HAS_PRECISE_SMC
- pc = 0;
-#endif
- if (!pc) {
+ if (!pc || !cpu || !cpu->cc->tcg_ops->precise_smc) {
tb_invalidate_phys_page(addr);
return false;
}
@@ -1080,15 +1088,14 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
* the CPU state.
*/
current_tb_modified = true;
- cpu_restore_state_from_tb(current_cpu, current_tb, pc);
+ cpu_restore_state_from_tb(cpu, current_tb, pc);
}
tb_phys_invalidate__locked(tb);
}
if (current_tb_modified) {
/* Force execution of one insn next time. */
- CPUState *cpu = current_cpu;
- cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
+ cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
return true;
}
return false;
@@ -1097,23 +1104,28 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
/*
* @p must be non-NULL.
* Call with all @pages locked.
+ * (@cpu, @retaddr) may be (NULL, 0) outside of a cpu context,
+ * in which case precise_smc need not be detected.
*/
static void
-tb_invalidate_phys_page_range__locked(struct page_collection *pages,
+tb_invalidate_phys_page_range__locked(CPUState *cpu,
+ struct page_collection *pages,
PageDesc *p, tb_page_addr_t start,
tb_page_addr_t last,
uintptr_t retaddr)
{
TranslationBlock *tb;
PageForEachNext n;
-#ifdef TARGET_HAS_PRECISE_SMC
bool current_tb_modified = false;
- TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
-#endif /* TARGET_HAS_PRECISE_SMC */
+ TranslationBlock *current_tb = NULL;
/* Range may not cross a page. */
tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
+ if (retaddr && cpu && cpu->cc->tcg_ops->precise_smc) {
+ current_tb = tcg_tb_lookup(retaddr);
+ }
+
/*
* We remove all the TBs in the range [start, last].
* XXX: see if in some cases it could be faster to invalidate all the code
@@ -1131,8 +1143,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
}
if (!(tb_last < start || tb_start > last)) {
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb == tb &&
+ if (unlikely(current_tb == tb) &&
(tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
/*
* If we are modifying the current TB, we must stop
@@ -1142,9 +1153,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
* restore the CPU state.
*/
current_tb_modified = true;
- cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
+ cpu_restore_state_from_tb(cpu, current_tb, retaddr);
}
-#endif /* TARGET_HAS_PRECISE_SMC */
tb_phys_invalidate__locked(tb);
}
}
@@ -1154,15 +1164,12 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
tlb_unprotect_code(start);
}
-#ifdef TARGET_HAS_PRECISE_SMC
- if (current_tb_modified) {
+ if (unlikely(current_tb_modified)) {
page_collection_unlock(pages);
/* Force execution of one insn next time. */
- current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
- mmap_unlock();
- cpu_loop_exit_noexc(current_cpu);
+ cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
+ cpu_loop_exit_noexc(cpu);
}
-#endif
}
/*
@@ -1172,7 +1179,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
* access: the virtual CPU will exit the current TB if code is modified inside
* this TB.
*/
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
+void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start,
+ tb_page_addr_t last)
{
struct page_collection *pages;
tb_page_addr_t index, index_last;
@@ -1191,44 +1199,30 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
page_start = index << TARGET_PAGE_BITS;
page_last = page_start | ~TARGET_PAGE_MASK;
page_last = MIN(page_last, last);
- tb_invalidate_phys_page_range__locked(pages, pd,
+ tb_invalidate_phys_page_range__locked(cpu, pages, pd,
page_start, page_last, 0);
}
page_collection_unlock(pages);
}
/*
- * Call with all @pages in the range [@start, @start + len[ locked.
- */
-static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
- tb_page_addr_t start,
- unsigned len, uintptr_t ra)
-{
- PageDesc *p;
-
- p = page_find(start >> TARGET_PAGE_BITS);
- if (!p) {
- return;
- }
-
- assert_page_locked(p);
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
-}
-
-/*
* len must be <= 8 and start must be a multiple of len.
* Called via softmmu_template.h when code areas are written to with
* iothread mutex not held.
*/
-void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
- unsigned size,
- uintptr_t retaddr)
+void tb_invalidate_phys_range_fast(CPUState *cpu, ram_addr_t start,
+ unsigned len, uintptr_t ra)
{
- struct page_collection *pages;
+ PageDesc *p = page_find(start >> TARGET_PAGE_BITS);
- pages = page_collection_lock(ram_addr, ram_addr + size - 1);
- tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
- page_collection_unlock(pages);
+ if (p) {
+ ram_addr_t last = start + len - 1;
+ struct page_collection *pages = page_collection_lock(start, last);
+
+ tb_invalidate_phys_page_range__locked(cpu, pages, p,
+ start, last, ra);
+ page_collection_unlock(pages);
+ }
}
#endif /* CONFIG_USER_ONLY */
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index 27cf104..d0f7b41 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -25,7 +25,7 @@
#include "qemu/osdep.h"
#include "system/replay.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "qemu/main-loop.h"
#include "qemu/guest-random.h"
#include "hw/core/cpu.h"
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index bdcc385..cf1ee7a 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -26,7 +26,7 @@
#include "qemu/osdep.h"
#include "system/tcg.h"
#include "system/replay.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "qemu/main-loop.h"
#include "qemu/notify.h"
#include "qemu/guest-random.h"
@@ -84,10 +84,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
cpu_thread_signal_created(cpu);
qemu_guest_random_seed_thread_part2(cpu->random_seed);
- /* process any pending work */
- cpu->exit_request = 1;
-
do {
+ qemu_process_cpu_events(cpu);
+
if (cpu_can_run(cpu)) {
int r;
bql_unlock();
@@ -112,9 +111,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
break;
}
}
-
- qatomic_set_mb(&cpu->exit_request, 0);
- qemu_wait_io_event(cpu);
} while (!cpu->unplug || cpu_can_run(cpu));
tcg_cpu_destroy(cpu);
@@ -124,11 +120,6 @@ static void *mttcg_cpu_thread_fn(void *arg)
return NULL;
}
-void mttcg_kick_vcpu_thread(CPUState *cpu)
-{
- cpu_exit(cpu);
-}
-
void mttcg_start_vcpu_thread(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
diff --git a/accel/tcg/tcg-accel-ops-mttcg.h b/accel/tcg/tcg-accel-ops-mttcg.h
index 8ffa7a9..5c145cc 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.h
+++ b/accel/tcg/tcg-accel-ops-mttcg.h
@@ -10,9 +10,6 @@
#ifndef TCG_ACCEL_OPS_MTTCG_H
#define TCG_ACCEL_OPS_MTTCG_H
-/* kick MTTCG vCPU thread */
-void mttcg_kick_vcpu_thread(CPUState *cpu);
-
/* start an mttcg vCPU thread */
void mttcg_start_vcpu_thread(CPUState *cpu);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index f62cf24..2fb4643 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -27,7 +27,7 @@
#include "qemu/lockable.h"
#include "system/tcg.h"
#include "system/replay.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "qemu/main-loop.h"
#include "qemu/notify.h"
#include "qemu/guest-random.h"
@@ -43,7 +43,7 @@ void rr_kick_vcpu_thread(CPUState *unused)
CPUState *cpu;
CPU_FOREACH(cpu) {
- cpu_exit(cpu);
+ tcg_kick_vcpu_thread(cpu);
};
}
@@ -117,7 +117,7 @@ static void rr_wait_io_event(void)
rr_start_kick_timer();
CPU_FOREACH(cpu) {
- qemu_wait_io_event_common(cpu);
+ qemu_process_cpu_events_common(cpu);
}
}
@@ -203,7 +203,7 @@ static void *rr_cpu_thread_fn(void *arg)
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
- qemu_wait_io_event_common(cpu);
+ qemu_process_cpu_events_common(cpu);
}
}
@@ -211,13 +211,30 @@ static void *rr_cpu_thread_fn(void *arg)
cpu = first_cpu;
- /* process any pending work */
- cpu->exit_request = 1;
-
while (1) {
/* Only used for icount_enabled() */
int64_t cpu_budget = 0;
+ if (cpu) {
+ /*
+ * This could even reset exit_request for all CPUs, but in practice
+ * races between CPU exits and changes to "cpu" are so rare that
+ * there's no advantage in doing so.
+ */
+ qatomic_set(&cpu->exit_request, false);
+ }
+
+ if (icount_enabled() && all_cpu_threads_idle()) {
+ /*
+ * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+ * in the main_loop, wake it up in order to start the warp timer.
+ */
+ qemu_notify_event();
+ }
+
+ rr_wait_io_event();
+ rr_deal_with_unplugged_cpus();
+
bql_unlock();
replay_mutex_lock();
bql_lock();
@@ -242,10 +259,17 @@ static void *rr_cpu_thread_fn(void *arg)
cpu = first_cpu;
}
- while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
- /* Store rr_current_cpu before evaluating cpu_can_run(). */
+ while (cpu && cpu_work_list_empty(cpu)) {
+ /*
+ * Store rr_current_cpu before evaluating cpu->exit_request.
+ * Pairs with rr_kick_next_cpu().
+ */
qatomic_set_mb(&rr_current_cpu, cpu);
+ /* Pairs with store-release in cpu_exit. */
+ if (qatomic_load_acquire(&cpu->exit_request)) {
+ break;
+ }
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -285,21 +309,6 @@ static void *rr_cpu_thread_fn(void *arg)
/* Does not need a memory barrier because a spurious wakeup is okay. */
qatomic_set(&rr_current_cpu, NULL);
-
- if (cpu && cpu->exit_request) {
- qatomic_set_mb(&cpu->exit_request, 0);
- }
-
- if (icount_enabled() && all_cpu_threads_idle()) {
- /*
- * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
- * in the main_loop, wake it up in order to start the warp timer.
- */
- qemu_notify_event();
- }
-
- rr_wait_io_event();
- rr_deal_with_unplugged_cpus();
}
g_assert_not_reached();
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index d9b662e..3bd9800 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -26,10 +26,11 @@
*/
#include "qemu/osdep.h"
-#include "system/accel-ops.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
#include "system/tcg.h"
#include "system/replay.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "qemu/main-loop.h"
#include "qemu/guest-random.h"
#include "qemu/timer.h"
@@ -37,6 +38,7 @@
#include "exec/hwaddr.h"
#include "exec/tb-flush.h"
#include "exec/translation-block.h"
+#include "exec/watchpoint.h"
#include "gdbstub/enums.h"
#include "hw/core/cpu.h"
@@ -79,6 +81,7 @@ int tcg_cpu_exec(CPUState *cpu)
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
+
return ret;
}
@@ -92,9 +95,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
/* mask must never be zero, except for A20 change call */
void tcg_handle_interrupt(CPUState *cpu, int mask)
{
- g_assert(bql_locked());
-
- cpu->interrupt_request |= mask;
+ cpu_set_interrupt(cpu, mask);
/*
* If called from iothread context, wake the target cpu in
@@ -197,11 +198,13 @@ static inline void tcg_remove_all_breakpoints(CPUState *cpu)
cpu_watchpoint_remove_all(cpu, BP_GDB);
}
-static void tcg_accel_ops_init(AccelOpsClass *ops)
+static void tcg_accel_ops_init(AccelClass *ac)
{
+ AccelOpsClass *ops = ac->ops;
+
if (qemu_tcg_mttcg_enabled()) {
ops->create_vcpu_thread = mttcg_start_vcpu_thread;
- ops->kick_vcpu_thread = mttcg_kick_vcpu_thread;
+ ops->kick_vcpu_thread = tcg_kick_vcpu_thread;
ops->handle_interrupt = tcg_handle_interrupt;
} else {
ops->create_vcpu_thread = rr_start_vcpu_thread;
@@ -223,7 +226,7 @@ static void tcg_accel_ops_init(AccelOpsClass *ops)
ops->remove_all_breakpoints = tcg_remove_all_breakpoints;
}
-static void tcg_accel_ops_class_init(ObjectClass *oc, void *data)
+static void tcg_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
diff --git a/accel/tcg/tcg-accel-ops.h b/accel/tcg/tcg-accel-ops.h
index 6feeb3f..aecce60 100644
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -18,5 +18,6 @@ void tcg_cpu_destroy(CPUState *cpu);
int tcg_cpu_exec(CPUState *cpu);
void tcg_handle_interrupt(CPUState *cpu, int mask);
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
+void tcg_kick_vcpu_thread(CPUState *cpu);
#endif /* TCG_ACCEL_OPS_H */
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index c1a30b0..18ea0c5 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -26,27 +26,31 @@
#include "qemu/osdep.h"
#include "system/tcg.h"
#include "exec/replay-core.h"
-#include "system/cpu-timers.h"
+#include "exec/icount.h"
#include "tcg/startup.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/accel.h"
#include "qemu/atomic.h"
+#include "qapi/qapi-types-common.h"
#include "qapi/qapi-builtin-visit.h"
#include "qemu/units.h"
-#if defined(CONFIG_USER_ONLY)
-#include "hw/qdev-core.h"
-#else
+#include "qemu/target-info.h"
+#ifndef CONFIG_USER_ONLY
#include "hw/boards.h"
+#include "exec/tb-flush.h"
+#include "system/runstate.h"
#endif
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
+#include "accel/tcg/cpu-ops.h"
#include "internal-common.h"
-#include "cpu-param.h"
struct TCGState {
AccelState parent_obj;
- bool mttcg_enabled;
+ OnOffAuto mttcg_enabled;
bool one_insn_per_tb;
int splitwx_enabled;
unsigned long tb_size;
@@ -58,40 +62,18 @@ typedef struct TCGState TCGState;
DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
TYPE_TCG_ACCEL)
-/*
- * We default to false if we know other options have been enabled
- * which are currently incompatible with MTTCG. Otherwise when each
- * guest (target) has been updated to support:
- * - atomic instructions
- * - memory ordering primitives (barriers)
- * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
- *
- * Once a guest architecture has been converted to the new primitives
- * there is one remaining limitation to check:
- * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
- */
-
-static bool default_mttcg_enabled(void)
+#ifndef CONFIG_USER_ONLY
+bool qemu_tcg_mttcg_enabled(void)
{
- if (icount_enabled()) {
- return false;
- }
-#ifdef TARGET_SUPPORTS_MTTCG
-# ifndef TCG_GUEST_DEFAULT_MO
-# error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO"
-# endif
- return true;
-#else
- return false;
-#endif
+ TCGState *s = TCG_STATE(current_accel());
+ return s->mttcg_enabled == ON_OFF_AUTO_ON;
}
+#endif /* !CONFIG_USER_ONLY */
static void tcg_accel_instance_init(Object *obj)
{
TCGState *s = TCG_STATE(obj);
- s->mttcg_enabled = default_mttcg_enabled();
-
/* If debugging enabled, default "auto on", otherwise off. */
#if defined(CONFIG_DEBUG_TCG) && !defined(CONFIG_USER_ONLY)
s->splitwx_enabled = -1;
@@ -100,24 +82,76 @@ static void tcg_accel_instance_init(Object *obj)
#endif
}
-bool mttcg_enabled;
bool one_insn_per_tb;
-static int tcg_init_machine(MachineState *ms)
+#ifndef CONFIG_USER_ONLY
+static void tcg_vm_change_state(void *opaque, bool running, RunState state)
{
- TCGState *s = TCG_STATE(current_accel());
-#ifdef CONFIG_USER_ONLY
- unsigned max_cpus = 1;
-#else
- unsigned max_cpus = ms->smp.max_cpus;
+ if (state == RUN_STATE_RESTORE_VM) {
+ /*
+ * loadvm will update the content of RAM, bypassing the usual
+ * mechanisms that ensure we flush TBs for writes to memory
+ * we've translated code from, so we must flush all TBs.
+ *
+ * vm_stop() has just stopped all cpus, so we are exclusive.
+ */
+ assert(!running);
+ tb_flush__exclusive_or_serial();
+ }
+}
+#endif
+
+static int tcg_init_machine(AccelState *as, MachineState *ms)
+{
+ TCGState *s = TCG_STATE(as);
+ unsigned max_threads = 1;
+
+#ifndef CONFIG_USER_ONLY
+ CPUClass *cc = CPU_CLASS(object_class_by_name(target_cpu_type()));
+ bool mttcg_supported = cc->tcg_ops->mttcg_supported;
+
+ switch (s->mttcg_enabled) {
+ case ON_OFF_AUTO_AUTO:
+ /*
+ * We default to false if we know other options have been enabled
+ * which are currently incompatible with MTTCG. Otherwise when each
+ * guest (target) has been updated to support:
+ * - atomic instructions
+ * - memory ordering primitives (barriers)
+ * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
+ *
+ * Once a guest architecture has been converted to the new primitives
+ * there is one remaining limitation to check:
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ */
+ if (mttcg_supported && !icount_enabled()) {
+ s->mttcg_enabled = ON_OFF_AUTO_ON;
+ max_threads = ms->smp.max_cpus;
+ } else {
+ s->mttcg_enabled = ON_OFF_AUTO_OFF;
+ }
+ break;
+ case ON_OFF_AUTO_ON:
+ if (!mttcg_supported) {
+ warn_report("Guest not yet converted to MTTCG - "
+ "you may get unexpected results");
+ }
+ max_threads = ms->smp.max_cpus;
+ break;
+ case ON_OFF_AUTO_OFF:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ qemu_add_vm_change_state_handler(tcg_vm_change_state, NULL);
#endif
tcg_allowed = true;
- mttcg_enabled = s->mttcg_enabled;
page_init();
tb_htable_init();
- tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus);
+ tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_threads);
#if defined(CONFIG_SOFTMMU)
/*
@@ -138,7 +172,7 @@ static char *tcg_get_thread(Object *obj, Error **errp)
{
TCGState *s = TCG_STATE(obj);
- return g_strdup(s->mttcg_enabled ? "multi" : "single");
+ return g_strdup(s->mttcg_enabled == ON_OFF_AUTO_ON ? "multi" : "single");
}
static void tcg_set_thread(Object *obj, const char *value, Error **errp)
@@ -149,14 +183,10 @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp)
if (icount_enabled()) {
error_setg(errp, "No MTTCG when icount is enabled");
} else {
-#ifndef TARGET_SUPPORTS_MTTCG
- warn_report("Guest not yet converted to MTTCG - "
- "you may get unexpected results");
-#endif
- s->mttcg_enabled = true;
+ s->mttcg_enabled = ON_OFF_AUTO_ON;
}
} else if (strcmp(value, "single") == 0) {
- s->mttcg_enabled = false;
+ s->mttcg_enabled = ON_OFF_AUTO_OFF;
} else {
error_setg(errp, "Invalid 'thread' setting %s", value);
}
@@ -212,7 +242,7 @@ static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp)
qatomic_set(&one_insn_per_tb, value);
}
-static int tcg_gdbstub_supported_sstep_flags(void)
+static int tcg_gdbstub_supported_sstep_flags(AccelState *as)
{
/*
* In replay mode all events will come from the log and can't be
@@ -227,13 +257,14 @@ static int tcg_gdbstub_supported_sstep_flags(void)
}
}
-static void tcg_accel_class_init(ObjectClass *oc, void *data)
+static void tcg_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "tcg";
ac->init_machine = tcg_init_machine;
ac->cpu_common_realize = tcg_exec_realizefn;
ac->cpu_common_unrealize = tcg_exec_unrealizefn;
+ ac->get_stats = tcg_get_stats;
ac->allowed = &tcg_allowed;
ac->gdbstub_supported_sstep_flags = tcg_gdbstub_supported_sstep_flags;
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index c23b5e6..8436599 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -63,6 +63,18 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
i128, env, i64, i128, i128, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
i128, env, i64, i128, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_xchgo_be, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_xchgo_le, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_fetch_ando_be, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_fetch_ando_le, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_fetch_oro_be, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
+DEF_HELPER_FLAGS_4(atomic_fetch_oro_le, TCG_CALL_NO_WG,
+ i128, env, i64, i128, i32)
#endif
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo, TCG_CALL_NO_WG,
diff --git a/accel/tcg/tcg-stats.c b/accel/tcg/tcg-stats.c
new file mode 100644
index 0000000..ced5dec
--- /dev/null
+++ b/accel/tcg/tcg-stats.c
@@ -0,0 +1,219 @@
+/*
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * QEMU TCG statistics
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/accel.h"
+#include "qemu/qht.h"
+#include "qapi/error.h"
+#include "system/cpu-timers.h"
+#include "exec/icount.h"
+#include "hw/core/cpu.h"
+#include "tcg/tcg.h"
+#include "internal-common.h"
+#include "tb-context.h"
+#include <math.h>
+
+static void dump_drift_info(GString *buf)
+{
+ if (!icount_enabled()) {
+ return;
+ }
+
+ g_string_append_printf(buf, "Host - Guest clock %"PRIi64" ms\n",
+ (cpu_get_clock() - icount_get()) / SCALE_MS);
+ if (icount_align_option) {
+ g_string_append_printf(buf, "Max guest delay %"PRIi64" ms\n",
+ -max_delay / SCALE_MS);
+ g_string_append_printf(buf, "Max guest advance %"PRIi64" ms\n",
+ max_advance / SCALE_MS);
+ } else {
+ g_string_append_printf(buf, "Max guest delay NA\n");
+ g_string_append_printf(buf, "Max guest advance NA\n");
+ }
+}
+
+static void dump_accel_info(AccelState *accel, GString *buf)
+{
+ bool one_insn_per_tb = object_property_get_bool(OBJECT(accel),
+ "one-insn-per-tb",
+ &error_fatal);
+
+ g_string_append_printf(buf, "Accelerator settings:\n");
+ g_string_append_printf(buf, "one-insn-per-tb: %s\n\n",
+ one_insn_per_tb ? "on" : "off");
+}
+
+static void print_qht_statistics(struct qht_stats hst, GString *buf)
+{
+ uint32_t hgram_opts;
+ size_t hgram_bins;
+ char *hgram;
+ double avg;
+
+ if (!hst.head_buckets) {
+ return;
+ }
+ g_string_append_printf(buf, "TB hash buckets %zu/%zu "
+ "(%0.2f%% head buckets used)\n",
+ hst.used_head_buckets, hst.head_buckets,
+ (double)hst.used_head_buckets /
+ hst.head_buckets * 100);
+
+ hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
+ hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
+ if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
+ hgram_opts |= QDIST_PR_NODECIMAL;
+ }
+ hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
+ avg = qdist_avg(&hst.occupancy);
+ if (!isnan(avg)) {
+ g_string_append_printf(buf, "TB hash occupancy "
+ "%0.2f%% avg chain occ. "
+ "Histogram: %s\n",
+ avg * 100, hgram);
+ }
+ g_free(hgram);
+
+ hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
+ hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
+ if (hgram_bins > 10) {
+ hgram_bins = 10;
+ } else {
+ hgram_bins = 0;
+ hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
+ }
+ hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
+ avg = qdist_avg(&hst.chain);
+ if (!isnan(avg)) {
+ g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
+ "Histogram: %s\n",
+ avg, hgram);
+ }
+ g_free(hgram);
+}
+
+struct tb_tree_stats {
+ size_t nb_tbs;
+ size_t host_size;
+ size_t target_size;
+ size_t max_target_size;
+ size_t direct_jmp_count;
+ size_t direct_jmp2_count;
+ size_t cross_page;
+};
+
+static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
+{
+ const TranslationBlock *tb = value;
+ struct tb_tree_stats *tst = data;
+
+ tst->nb_tbs++;
+ tst->host_size += tb->tc.size;
+ tst->target_size += tb->size;
+ if (tb->size > tst->max_target_size) {
+ tst->max_target_size = tb->size;
+ }
+#ifndef CONFIG_USER_ONLY
+ if (tb->page_addr[1] != -1) {
+ tst->cross_page++;
+ }
+#endif
+ if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
+ tst->direct_jmp_count++;
+ if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
+ tst->direct_jmp2_count++;
+ }
+ }
+ return false;
+}
+
+static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
+{
+ CPUState *cpu;
+ size_t full = 0, part = 0, elide = 0;
+
+ CPU_FOREACH(cpu) {
+ full += qatomic_read(&cpu->neg.tlb.c.full_flush_count);
+ part += qatomic_read(&cpu->neg.tlb.c.part_flush_count);
+ elide += qatomic_read(&cpu->neg.tlb.c.elide_flush_count);
+ }
+ *pfull = full;
+ *ppart = part;
+ *pelide = elide;
+}
+
+static void tcg_dump_flush_info(GString *buf)
+{
+ size_t flush_full, flush_part, flush_elide;
+
+ g_string_append_printf(buf, "TB flush count %u\n",
+ qatomic_read(&tb_ctx.tb_flush_count));
+ g_string_append_printf(buf, "TB invalidate count %u\n",
+ qatomic_read(&tb_ctx.tb_phys_invalidate_count));
+
+ tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
+ g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
+ g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
+ g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
+}
+
+static void dump_exec_info(GString *buf)
+{
+ struct tb_tree_stats tst = {};
+ struct qht_stats hst;
+ size_t nb_tbs;
+
+ tcg_tb_foreach(tb_tree_stats_iter, &tst);
+ nb_tbs = tst.nb_tbs;
+ /* XXX: avoid using doubles ? */
+ g_string_append_printf(buf, "Translation buffer state:\n");
+ /*
+ * Report total code size including the padding and TB structs;
+ * otherwise users might think "-accel tcg,tb-size" is not honoured.
+ * For avg host size we use the precise numbers from tb_tree_stats though.
+ */
+ g_string_append_printf(buf, "gen code size %zu/%zu\n",
+ tcg_code_size(), tcg_code_capacity());
+ g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
+ g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
+ nb_tbs ? tst.target_size / nb_tbs : 0,
+ tst.max_target_size);
+ g_string_append_printf(buf, "TB avg host size %zu bytes "
+ "(expansion ratio: %0.1f)\n",
+ nb_tbs ? tst.host_size / nb_tbs : 0,
+ tst.target_size ?
+ (double)tst.host_size / tst.target_size : 0);
+ g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
+ tst.cross_page,
+ nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
+ g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
+ "(2 jumps=%zu %zu%%)\n",
+ tst.direct_jmp_count,
+ nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
+ tst.direct_jmp2_count,
+ nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
+
+ qht_statistics_init(&tb_ctx.htable, &hst);
+ print_qht_statistics(hst, buf);
+ qht_statistics_destroy(&hst);
+
+ g_string_append_printf(buf, "\nStatistics:\n");
+ tcg_dump_flush_info(buf);
+}
+
+void tcg_get_stats(AccelState *accel, GString *buf)
+{
+ dump_accel_info(accel, buf);
+ dump_exec_info(buf);
+ dump_drift_info(buf);
+}
+
+void tcg_dump_stats(GString *buf)
+{
+ tcg_get_stats(current_accel(), buf);
+}
diff --git a/accel/tcg/tlb-bounds.h b/accel/tcg/tlb-bounds.h
new file mode 100644
index 0000000..f83d9ac
--- /dev/null
+++ b/accel/tcg/tlb-bounds.h
@@ -0,0 +1,13 @@
+/*
+ * softmmu size bounds
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef ACCEL_TCG_TLB_BOUNDS_H
+#define ACCEL_TCG_TLB_BOUNDS_H
+
+#define CPU_TLB_DYN_MIN_BITS 6
+#define CPU_TLB_DYN_MAX_BITS (32 - TARGET_PAGE_BITS)
+#define CPU_TLB_DYN_DEFAULT_BITS 8
+
+#endif /* ACCEL_TCG_TLB_BOUNDS_H */
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 82bc16b..da9d7f1 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -21,49 +21,20 @@
#include "trace.h"
#include "disas/disas.h"
-#include "exec/exec-all.h"
#include "tcg/tcg.h"
-#if defined(CONFIG_USER_ONLY)
-#include "qemu.h"
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <sys/param.h>
-#if __FreeBSD_version >= 700104
-#define HAVE_KINFO_GETVMMAP
-#define sigqueue sigqueue_freebsd /* avoid redefinition */
-#include <sys/proc.h>
-#include <machine/profile.h>
-#define _KERNEL
-#include <sys/user.h>
-#undef _KERNEL
-#undef sigqueue
-#include <libutil.h>
-#endif
-#endif
-#else
-#include "exec/ram_addr.h"
-#endif
-
-#include "exec/cputlb.h"
-#include "exec/page-protection.h"
+#include "exec/mmap-lock.h"
#include "tb-internal.h"
-#include "exec/translator.h"
#include "exec/tb-flush.h"
-#include "qemu/bitmap.h"
-#include "qemu/qemu-print.h"
-#include "qemu/main-loop.h"
#include "qemu/cacheinfo.h"
-#include "qemu/timer.h"
+#include "qemu/target-info.h"
#include "exec/log.h"
-#include "system/cpu-timers.h"
-#include "system/tcg.h"
-#include "qapi/error.h"
+#include "exec/icount.h"
#include "accel/tcg/cpu-ops.h"
#include "tb-jmp-cache.h"
#include "tb-hash.h"
#include "tb-context.h"
#include "tb-internal.h"
#include "internal-common.h"
-#include "internal-target.h"
#include "tcg/perf.h"
#include "tcg/insn-start-words.h"
@@ -106,7 +77,7 @@ static int64_t decode_sleb128(const uint8_t **pp)
val |= (int64_t)(byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
- if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
+ if (shift < 64 && (byte & 0x40)) {
val |= -(int64_t)1 << shift;
}
@@ -117,7 +88,7 @@ static int64_t decode_sleb128(const uint8_t **pp)
/* Encode the data collected about the instructions while compiling TB.
Place the data at BLOCK, and return the number of bytes consumed.
- The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
+ The logical table consists of INSN_START_WORDS uint64_t's,
which come from the target's insn_start data, followed by a uintptr_t
which comes from the host pc of the end of the code implementing the insn.
@@ -137,13 +108,13 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
for (i = 0, n = tb->icount; i < n; ++i) {
uint64_t prev, curr;
- for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+ for (j = 0; j < INSN_START_WORDS; ++j) {
if (i == 0) {
prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
} else {
- prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
+ prev = insn_data[(i - 1) * INSN_START_WORDS + j];
}
- curr = insn_data[i * TARGET_INSN_START_WORDS + j];
+ curr = insn_data[i * INSN_START_WORDS + j];
p = encode_sleb128(p, curr - prev);
}
prev = (i == 0 ? 0 : insn_end_off[i - 1]);
@@ -175,7 +146,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
return -1;
}
- memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
+ memset(data, 0, sizeof(uint64_t) * INSN_START_WORDS);
if (!(tb_cflags(tb) & CF_PCREL)) {
data[0] = tb->pc;
}
@@ -185,7 +156,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
* at which the end of the insn exceeds host_pc.
*/
for (i = 0; i < num_insns; ++i) {
- for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
+ for (j = 0; j < INSN_START_WORDS; ++j) {
data[j] += decode_sleb128(&p);
}
iter_pc += decode_sleb128(&p);
@@ -203,7 +174,7 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t host_pc)
{
- uint64_t data[TARGET_INSN_START_WORDS];
+ uint64_t data[INSN_START_WORDS];
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
if (insns_left < 0) {
@@ -287,9 +258,7 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
}
/* Called with mmap_lock held for user mode emulation. */
-TranslationBlock *tb_gen_code(CPUState *cpu,
- vaddr pc, uint64_t cs_base,
- uint32_t flags, int cflags)
+TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s)
{
CPUArchState *env = cpu_env(cpu);
TranslationBlock *tb, *existing_tb;
@@ -302,14 +271,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
assert_memory_lock();
qemu_thread_jit_write();
- phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
+ phys_pc = get_page_addr_code_hostp(env, s.pc, &host_pc);
if (phys_pc == -1) {
/* Generate a one-shot TB with 1 insn in it */
- cflags = (cflags & ~CF_COUNT_MASK) | 1;
+ s.cflags = (s.cflags & ~CF_COUNT_MASK) | 1;
}
- max_insns = cflags & CF_COUNT_MASK;
+ max_insns = s.cflags & CF_COUNT_MASK;
if (max_insns == 0) {
max_insns = TCG_MAX_INSNS;
}
@@ -320,7 +289,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb = tcg_tb_alloc(tcg_ctx);
if (unlikely(!tb)) {
/* flush must be done */
- tb_flush(cpu);
+ if (cpu_in_serial_context(cpu)) {
+ tb_flush__exclusive_or_serial();
+ goto buffer_overflow;
+ }
+ queue_tb_flush(cpu);
mmap_unlock();
/* Make the execution loop process the flush as soon as possible. */
cpu->exception_index = EXCP_INTERRUPT;
@@ -329,12 +302,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
gen_code_buf = tcg_ctx->code_gen_ptr;
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
- if (!(cflags & CF_PCREL)) {
- tb->pc = pc;
+ if (!(s.cflags & CF_PCREL)) {
+ tb->pc = s.pc;
}
- tb->cs_base = cs_base;
- tb->flags = flags;
- tb->cflags = cflags;
+ tb->cs_base = s.cs_base;
+ tb->flags = s.flags;
+ tb->cflags = s.cflags;
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
if (phys_pc != -1) {
@@ -342,23 +315,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
tcg_ctx->gen_tb = tb;
- tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
-#ifdef CONFIG_SOFTMMU
- tcg_ctx->page_bits = TARGET_PAGE_BITS;
- tcg_ctx->page_mask = TARGET_PAGE_MASK;
- tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
-#endif
- tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
-#ifdef TCG_GUEST_DEFAULT_MO
- tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
-#else
- tcg_ctx->guest_mo = TCG_MO_ALL;
-#endif
+ tcg_ctx->addr_type = target_long_bits() == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
+ tcg_ctx->guest_mo = cpu->cc->tcg_ops->guest_default_memory_order;
restart_translate:
- trace_translate_block(tb, pc, tb->tc.ptr);
+ trace_translate_block(tb, s.pc, tb->tc.ptr);
- gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+ gen_code_size = setjmp_gen_code(env, tb, s.pc, host_pc, &max_insns, &ti);
if (unlikely(gen_code_size < 0)) {
switch (gen_code_size) {
case -1:
@@ -435,10 +398,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* For CF_PCREL, attribute all executions of the generated code
* to its first mapping.
*/
- perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
+ perf_report_code(s.pc, tb, tcg_splitwx_to_rx(gen_code_buf));
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
- qemu_log_in_addr_range(pc)) {
+ qemu_log_in_addr_range(s.pc)) {
FILE *logfile = qemu_log_trylock();
if (logfile) {
int code_size, data_size;
@@ -460,7 +423,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
fprintf(logfile,
" -- guest addr 0x%016" PRIx64 " + tb prologue\n",
- tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
+ tcg_ctx->gen_insn_data[insn * INSN_START_WORDS]);
chunk_start = tcg_ctx->gen_insn_end_off[insn];
disas(logfile, tb->tc.ptr, chunk_start);
@@ -473,7 +436,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
if (chunk_end > chunk_start) {
fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
- tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
+ tcg_ctx->gen_insn_data[insn * INSN_START_WORDS]);
disas(logfile, tb->tc.ptr + chunk_start,
chunk_end - chunk_start);
chunk_start = chunk_end;
@@ -591,15 +554,11 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
/* The exception probably happened in a helper. The CPU state should
have been saved before calling it. Fetch the PC from there. */
CPUArchState *env = cpu_env(cpu);
- vaddr pc;
- uint64_t cs_base;
- tb_page_addr_t addr;
- uint32_t flags;
+ TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu);
+ tb_page_addr_t addr = get_page_addr_code(env, s.pc);
- cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
- addr = get_page_addr_code(env, pc);
if (addr != -1) {
- tb_invalidate_phys_range(addr, addr);
+ tb_invalidate_phys_range(cpu, addr, addr);
}
}
}
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index ef1538b..034f2f3 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -8,16 +8,16 @@
*/
#include "qemu/osdep.h"
+#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst-common.h"
+#include "accel/tcg/cpu-mmu-index.h"
+#include "exec/target_page.h"
#include "exec/translator.h"
-#include "exec/cpu_ldst.h"
#include "exec/plugin-gen.h"
-#include "exec/cpu_ldst.h"
-#include "exec/tswap.h"
#include "tcg/tcg-op-common.h"
-#include "internal-target.h"
+#include "internal-common.h"
#include "disas/disas.h"
#include "tb-internal.h"
@@ -25,8 +25,7 @@ static void set_can_do_io(DisasContextBase *db, bool val)
{
QEMU_BUILD_BUG_ON(sizeof_field(CPUState, neg.can_do_io) != 1);
tcg_gen_st8_i32(tcg_constant_i32(val), tcg_env,
- offsetof(ArchCPU, parent_obj.neg.can_do_io) -
- offsetof(ArchCPU, env));
+ offsetof(CPUState, neg.can_do_io) - sizeof(CPUState));
}
bool translator_io_start(DisasContextBase *db)
@@ -49,8 +48,8 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) {
count = tcg_temp_new_i32();
tcg_gen_ld_i32(count, tcg_env,
- offsetof(ArchCPU, parent_obj.neg.icount_decr.u32)
- - offsetof(ArchCPU, env));
+ offsetof(CPUState, neg.icount_decr.u32) -
+ sizeof(CPUState));
}
if (cflags & CF_USE_ICOUNT) {
@@ -79,8 +78,8 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
if (cflags & CF_USE_ICOUNT) {
tcg_gen_st16_i32(count, tcg_env,
- offsetof(ArchCPU, parent_obj.neg.icount_decr.u16.low)
- - offsetof(ArchCPU, env));
+ offsetof(CPUState, neg.icount_decr.u16.low) -
+ sizeof(CPUState));
}
return icount_start_insn;
@@ -142,6 +141,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
db->host_addr[1] = NULL;
db->record_start = 0;
db->record_len = 0;
+ db->code_mmuidx = cpu_mmu_index(cpu, true);
ops->init_disas_context(db, cpu);
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
@@ -265,12 +265,14 @@ static bool translator_ld(CPUArchState *env, DisasContextBase *db,
if (likely(((base ^ last) & TARGET_PAGE_MASK) == 0)) {
/* Entire read is from the first page. */
- memcpy(dest, host + (pc - base), len);
- return true;
+ goto do_read;
}
if (unlikely(((base ^ pc) & TARGET_PAGE_MASK) == 0)) {
- /* Read begins on the first page and extends to the second. */
+ /*
+ * Read begins on the first page and extends to the second.
+ * The unaligned read is never atomic.
+ */
size_t len0 = -(pc | TARGET_PAGE_MASK);
memcpy(dest, host + (pc - base), len0);
pc += len0;
@@ -329,7 +331,39 @@ static bool translator_ld(CPUArchState *env, DisasContextBase *db,
host = db->host_addr[1];
}
- memcpy(dest, host + (pc - base), len);
+ do_read:
+ /*
+ * Assume aligned reads should be atomic, if possible.
+ * We're not in a position to jump out with EXCP_ATOMIC.
+ */
+ host += pc - base;
+ switch (len) {
+ case 2:
+ if (QEMU_IS_ALIGNED(pc, 2)) {
+ uint16_t t = qatomic_read((uint16_t *)host);
+ stw_he_p(dest, t);
+ return true;
+ }
+ break;
+ case 4:
+ if (QEMU_IS_ALIGNED(pc, 4)) {
+ uint32_t t = qatomic_read((uint32_t *)host);
+ stl_he_p(dest, t);
+ return true;
+ }
+ break;
+#ifdef CONFIG_ATOMIC64
+ case 8:
+ if (QEMU_IS_ALIGNED(pc, 8)) {
+ uint64_t t = qatomic_read__nocheck((uint64_t *)host);
+ stq_he_p(dest, t);
+ return true;
+ }
+ break;
+#endif
+ }
+ /* Unaligned or partial read from the second page is not atomic. */
+ memcpy(dest, host, len);
return true;
}
@@ -423,55 +457,62 @@ bool translator_st(const DisasContextBase *db, void *dest,
uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, vaddr pc)
{
- uint8_t raw;
+ uint8_t val;
- if (!translator_ld(env, db, &raw, pc, sizeof(raw))) {
- raw = cpu_ldub_code(env, pc);
- record_save(db, pc, &raw, sizeof(raw));
+ if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+ MemOpIdx oi = make_memop_idx(MO_UB, db->code_mmuidx);
+ val = cpu_ldb_code_mmu(env, pc, oi, 0);
+ record_save(db, pc, &val, sizeof(val));
}
- return raw;
+ return val;
}
-uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, vaddr pc)
+uint16_t translator_lduw_end(CPUArchState *env, DisasContextBase *db,
+ vaddr pc, MemOp endian)
{
- uint16_t raw, tgt;
+ uint16_t val;
- if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
- tgt = tswap16(raw);
- } else {
- tgt = cpu_lduw_code(env, pc);
- raw = tswap16(tgt);
- record_save(db, pc, &raw, sizeof(raw));
+ if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+ MemOpIdx oi = make_memop_idx(MO_UW, db->code_mmuidx);
+ val = cpu_ldw_code_mmu(env, pc, oi, 0);
+ record_save(db, pc, &val, sizeof(val));
+ }
+ if (endian & MO_BSWAP) {
+ val = bswap16(val);
}
- return tgt;
+ return val;
}
-uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, vaddr pc)
+uint32_t translator_ldl_end(CPUArchState *env, DisasContextBase *db,
+ vaddr pc, MemOp endian)
{
- uint32_t raw, tgt;
+ uint32_t val;
- if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
- tgt = tswap32(raw);
- } else {
- tgt = cpu_ldl_code(env, pc);
- raw = tswap32(tgt);
- record_save(db, pc, &raw, sizeof(raw));
+ if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+ MemOpIdx oi = make_memop_idx(MO_UL, db->code_mmuidx);
+ val = cpu_ldl_code_mmu(env, pc, oi, 0);
+ record_save(db, pc, &val, sizeof(val));
+ }
+ if (endian & MO_BSWAP) {
+ val = bswap32(val);
}
- return tgt;
+ return val;
}
-uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, vaddr pc)
+uint64_t translator_ldq_end(CPUArchState *env, DisasContextBase *db,
+ vaddr pc, MemOp endian)
{
- uint64_t raw, tgt;
+ uint64_t val;
- if (translator_ld(env, db, &raw, pc, sizeof(raw))) {
- tgt = tswap64(raw);
- } else {
- tgt = cpu_ldq_code(env, pc);
- raw = tswap64(tgt);
- record_save(db, pc, &raw, sizeof(raw));
+ if (!translator_ld(env, db, &val, pc, sizeof(val))) {
+ MemOpIdx oi = make_memop_idx(MO_UQ, db->code_mmuidx);
+ val = cpu_ldq_code_mmu(env, pc, oi, 0);
+ record_save(db, pc, &val, sizeof(val));
+ }
+ if (endian & MO_BSWAP) {
+ val = bswap64(val);
}
- return tgt;
+ return val;
}
void translator_fake_ld(DisasContextBase *db, const void *data, size_t len)
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 2322181..1800dff 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -20,34 +20,42 @@
#include "accel/tcg/cpu-ops.h"
#include "disas/disas.h"
#include "exec/vaddr.h"
-#include "exec/exec-all.h"
+#include "exec/tlb-flags.h"
#include "tcg/tcg.h"
#include "qemu/bitops.h"
#include "qemu/rcu.h"
-#include "exec/cpu_ldst.h"
+#include "accel/tcg/cpu-ldst-common.h"
+#include "accel/tcg/helper-retaddr.h"
+#include "accel/tcg/probe.h"
#include "user/cpu_loop.h"
+#include "user/guest-host.h"
#include "qemu/main-loop.h"
#include "user/page-protection.h"
#include "exec/page-protection.h"
-#include "exec/helper-proto.h"
+#include "exec/helper-proto-common.h"
#include "qemu/atomic128.h"
#include "qemu/bswap.h"
#include "qemu/int128.h"
#include "trace.h"
#include "tcg/tcg-ldst.h"
+#include "tcg-accel-ops.h"
+#include "backend-ldst.h"
#include "internal-common.h"
-#include "internal-target.h"
#include "tb-internal.h"
__thread uintptr_t helper_retaddr;
//#define DEBUG_SIGNAL
-void cpu_interrupt(CPUState *cpu, int mask)
+void qemu_cpu_kick(CPUState *cpu)
{
- g_assert(bql_locked());
- cpu->interrupt_request |= mask;
- qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
+ tcg_kick_vcpu_thread(cpu);
+}
+
+void qemu_process_cpu_events(CPUState *cpu)
+{
+ qatomic_set(&cpu->exit_request, false);
+ process_queued_cpu_work(cpu);
}
/*
@@ -123,9 +131,9 @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
* guest, we'd end up in an infinite loop of retrying the faulting access.
*/
bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
- uintptr_t host_pc, abi_ptr guest_addr)
+ uintptr_t host_pc, vaddr guest_addr)
{
- switch (page_unprotect(guest_addr, host_pc)) {
+ switch (page_unprotect(cpu, guest_addr, host_pc)) {
case 0:
/*
* Fault not caused by a page marked unwritable to protect
@@ -159,7 +167,7 @@ typedef struct PageFlagsNode {
static IntervalTreeRoot pageflags_root;
-static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
+static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
{
IntervalTreeNode *n;
@@ -167,8 +175,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
return n ? container_of(n, PageFlagsNode, itree) : NULL;
}
-static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
- target_ulong last)
+static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
{
IntervalTreeNode *n;
@@ -197,13 +204,22 @@ int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
return rc;
}
-static int dump_region(void *priv, target_ulong start,
- target_ulong end, unsigned long prot)
+static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
{
- FILE *f = (FILE *)priv;
+ FILE *f = opaque;
+ uint64_t mask;
+ int width;
+
+ if (guest_addr_max <= UINT32_MAX) {
+ mask = UINT32_MAX, width = 8;
+ } else {
+ mask = UINT64_MAX, width = 16;
+ }
- fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
- start, end, end - start,
+ fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n",
+ width, start & mask,
+ width, end & mask,
+ width, (end - start) & mask,
((prot & PAGE_READ) ? 'r' : '-'),
((prot & PAGE_WRITE) ? 'w' : '-'),
((prot & PAGE_EXEC) ? 'x' : '-'));
@@ -213,14 +229,14 @@ static int dump_region(void *priv, target_ulong start,
/* dump memory mappings */
void page_dump(FILE *f)
{
- const int length = sizeof(target_ulong) * 2;
+ int width = guest_addr_max <= UINT32_MAX ? 8 : 16;
fprintf(f, "%-*s %-*s %-*s %s\n",
- length, "start", length, "end", length, "size", "prot");
+ width, "start", width, "end", width, "size", "prot");
walk_memory_regions(f, dump_region);
}
-int page_get_flags(target_ulong address)
+int page_get_flags(vaddr address)
{
PageFlagsNode *p = pageflags_find(address, address);
@@ -243,7 +259,7 @@ int page_get_flags(target_ulong address)
}
/* A subroutine of page_set_flags: insert a new node for [start,last]. */
-static void pageflags_create(target_ulong start, target_ulong last, int flags)
+static void pageflags_create(vaddr start, vaddr last, int flags)
{
PageFlagsNode *p = g_new(PageFlagsNode, 1);
@@ -253,54 +269,11 @@ static void pageflags_create(target_ulong start, target_ulong last, int flags)
interval_tree_insert(&p->itree, &pageflags_root);
}
-/* A subroutine of page_set_flags: remove everything in [start,last]. */
-static bool pageflags_unset(target_ulong start, target_ulong last)
-{
- bool inval_tb = false;
-
- while (true) {
- PageFlagsNode *p = pageflags_find(start, last);
- target_ulong p_last;
-
- if (!p) {
- break;
- }
-
- if (p->flags & PAGE_EXEC) {
- inval_tb = true;
- }
-
- interval_tree_remove(&p->itree, &pageflags_root);
- p_last = p->itree.last;
-
- if (p->itree.start < start) {
- /* Truncate the node from the end, or split out the middle. */
- p->itree.last = start - 1;
- interval_tree_insert(&p->itree, &pageflags_root);
- if (last < p_last) {
- pageflags_create(last + 1, p_last, p->flags);
- break;
- }
- } else if (p_last <= last) {
- /* Range completely covers node -- remove it. */
- g_free_rcu(p, rcu);
- } else {
- /* Truncate the node from the start. */
- p->itree.start = last + 1;
- interval_tree_insert(&p->itree, &pageflags_root);
- break;
- }
- }
-
- return inval_tb;
-}
-
/*
* A subroutine of page_set_flags: nothing overlaps [start,last],
* but check adjacent mappings and maybe merge into a single range.
*/
-static void pageflags_create_merge(target_ulong start, target_ulong last,
- int flags)
+static void pageflags_create_merge(vaddr start, vaddr last, int flags)
{
PageFlagsNode *next = NULL, *prev = NULL;
@@ -341,28 +314,19 @@ static void pageflags_create_merge(target_ulong start, target_ulong last,
}
}
-/*
- * Allow the target to decide if PAGE_TARGET_[12] may be reset.
- * By default, they are not kept.
- */
-#ifndef PAGE_TARGET_STICKY
-#define PAGE_TARGET_STICKY 0
-#endif
-#define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
-
/* A subroutine of page_set_flags: add flags to [start,last]. */
-static bool pageflags_set_clear(target_ulong start, target_ulong last,
+static bool pageflags_set_clear(vaddr start, vaddr last,
int set_flags, int clear_flags)
{
PageFlagsNode *p;
- target_ulong p_start, p_last;
+ vaddr p_start, p_last;
int p_flags, merge_flags;
bool inval_tb = false;
restart:
p = pageflags_find(start, last);
if (!p) {
- if (set_flags) {
+ if (set_flags & PAGE_VALID) {
pageflags_create_merge(start, last, set_flags);
}
goto done;
@@ -376,11 +340,12 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
/*
* Need to flush if an overlapping executable region
- * removes exec, or adds write.
+ * removes exec, adds write, or is a new mapping.
*/
if ((p_flags & PAGE_EXEC)
&& (!(merge_flags & PAGE_EXEC)
- || (merge_flags & ~p_flags & PAGE_WRITE))) {
+ || (merge_flags & ~p_flags & PAGE_WRITE)
+ || (clear_flags & PAGE_VALID))) {
inval_tb = true;
}
@@ -389,7 +354,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
* attempting to merge with adjacent regions.
*/
if (start == p_start && last == p_last) {
- if (merge_flags) {
+ if (merge_flags & PAGE_VALID) {
p->flags = merge_flags;
} else {
interval_tree_remove(&p->itree, &pageflags_root);
@@ -409,12 +374,12 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
interval_tree_insert(&p->itree, &pageflags_root);
if (last < p_last) {
- if (merge_flags) {
+ if (merge_flags & PAGE_VALID) {
pageflags_create(start, last, merge_flags);
}
pageflags_create(last + 1, p_last, p_flags);
} else {
- if (merge_flags) {
+ if (merge_flags & PAGE_VALID) {
pageflags_create(start, p_last, merge_flags);
}
if (p_last < last) {
@@ -423,18 +388,18 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
}
}
} else {
- if (start < p_start && set_flags) {
+ if (start < p_start && (set_flags & PAGE_VALID)) {
pageflags_create(start, p_start - 1, set_flags);
}
if (last < p_last) {
interval_tree_remove(&p->itree, &pageflags_root);
p->itree.start = last + 1;
interval_tree_insert(&p->itree, &pageflags_root);
- if (merge_flags) {
+ if (merge_flags & PAGE_VALID) {
pageflags_create(start, last, merge_flags);
}
} else {
- if (merge_flags) {
+ if (merge_flags & PAGE_VALID) {
p->flags = merge_flags;
} else {
interval_tree_remove(&p->itree, &pageflags_root);
@@ -482,7 +447,7 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
g_free_rcu(p, rcu);
goto restart;
}
- if (set_flags) {
+ if (set_flags & PAGE_VALID) {
pageflags_create(start, last, set_flags);
}
@@ -490,49 +455,43 @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
return inval_tb;
}
-void page_set_flags(target_ulong start, target_ulong last, int flags)
+void page_set_flags(vaddr start, vaddr last, int set_flags, int clear_flags)
{
- bool reset = false;
- bool inval_tb = false;
-
- /* This function should never be called with addresses outside the
- guest address space. If this assert fires, it probably indicates
- a missing call to h2g_valid. */
+ /*
+ * This function should never be called with addresses outside the
+ * guest address space. If this assert fires, it probably indicates
+ * a missing call to h2g_valid.
+ */
assert(start <= last);
- assert(last <= GUEST_ADDR_MAX);
- /* Only set PAGE_ANON with new mappings. */
- assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
+ assert(last <= guest_addr_max);
assert_memory_lock();
start &= TARGET_PAGE_MASK;
last |= ~TARGET_PAGE_MASK;
- if (!(flags & PAGE_VALID)) {
- flags = 0;
- } else {
- reset = flags & PAGE_RESET;
- flags &= ~PAGE_RESET;
- if (flags & PAGE_WRITE) {
- flags |= PAGE_WRITE_ORG;
- }
+ if (set_flags & PAGE_WRITE) {
+ set_flags |= PAGE_WRITE_ORG;
+ }
+ if (clear_flags & PAGE_WRITE) {
+ clear_flags |= PAGE_WRITE_ORG;
}
- if (!flags || reset) {
+ if (clear_flags & PAGE_VALID) {
page_reset_target_data(start, last);
- inval_tb |= pageflags_unset(start, last);
- }
- if (flags) {
- inval_tb |= pageflags_set_clear(start, last, flags,
- ~(reset ? 0 : PAGE_STICKY));
+ clear_flags = -1;
+ } else {
+ /* Only set PAGE_ANON with new mappings. */
+ assert(!(set_flags & PAGE_ANON));
}
- if (inval_tb) {
- tb_invalidate_phys_range(start, last);
+
+ if (pageflags_set_clear(start, last, set_flags, clear_flags)) {
+ tb_invalidate_phys_range(NULL, start, last);
}
}
-bool page_check_range(target_ulong start, target_ulong len, int flags)
+bool page_check_range(vaddr start, vaddr len, int flags)
{
- target_ulong last;
+ vaddr last;
int locked; /* tri-state: =0: unlocked, +1: global, -1: local */
bool ret;
@@ -581,7 +540,7 @@ bool page_check_range(target_ulong start, target_ulong len, int flags)
break;
}
/* Asking about writable, but has been protected: undo. */
- if (!page_unprotect(start, 0)) {
+ if (!page_unprotect(NULL, start, 0)) {
ret = false;
break;
}
@@ -608,20 +567,19 @@ bool page_check_range(target_ulong start, target_ulong len, int flags)
return ret;
}
-bool page_check_range_empty(target_ulong start, target_ulong last)
+bool page_check_range_empty(vaddr start, vaddr last)
{
assert(last >= start);
assert_memory_lock();
return pageflags_find(start, last) == NULL;
}
-target_ulong page_find_range_empty(target_ulong min, target_ulong max,
- target_ulong len, target_ulong align)
+vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
{
- target_ulong len_m1, align_m1;
+ vaddr len_m1, align_m1;
assert(min <= max);
- assert(max <= GUEST_ADDR_MAX);
+ assert(max <= guest_addr_max);
assert(len != 0);
assert(is_power_of_2(align));
assert_memory_lock();
@@ -656,10 +614,10 @@ target_ulong page_find_range_empty(target_ulong min, target_ulong max,
}
}
-void page_protect(tb_page_addr_t address)
+void tb_lock_page0(tb_page_addr_t address)
{
PageFlagsNode *p;
- target_ulong start, last;
+ vaddr start, last;
int host_page_size = qemu_real_host_page_size();
int prot;
@@ -701,11 +659,13 @@ void page_protect(tb_page_addr_t address)
* immediately exited. (We can only return 2 if the 'pc' argument is
* non-zero.)
*/
-int page_unprotect(tb_page_addr_t address, uintptr_t pc)
+int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
{
PageFlagsNode *p;
bool current_tb_invalidated;
+ assert((cpu == NULL) == (pc == 0));
+
/*
* Technically this isn't safe inside a signal handler. However we
* know this only ever happens in a synchronous SEGV handler, so in
@@ -728,15 +688,15 @@ int page_unprotect(tb_page_addr_t address, uintptr_t pc)
* this thread raced with another one which got here first and
* set the page to PAGE_WRITE and did the TB invalidate for us.
*/
-#ifdef TARGET_HAS_PRECISE_SMC
- TranslationBlock *current_tb = tcg_tb_lookup(pc);
- if (current_tb) {
- current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
+ if (pc && cpu->cc->tcg_ops->precise_smc) {
+ TranslationBlock *current_tb = tcg_tb_lookup(pc);
+ if (current_tb) {
+ current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
+ }
}
-#endif
} else {
int host_page_size = qemu_real_host_page_size();
- target_ulong start, len, i;
+ vaddr start, len, i;
int prot;
if (host_page_size <= TARGET_PAGE_SIZE) {
@@ -744,14 +704,15 @@ int page_unprotect(tb_page_addr_t address, uintptr_t pc)
len = TARGET_PAGE_SIZE;
prot = p->flags | PAGE_WRITE;
pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
- current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
+ current_tb_invalidated =
+ tb_invalidate_phys_page_unwind(cpu, start, pc);
} else {
start = address & -host_page_size;
len = host_page_size;
prot = 0;
for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
- target_ulong addr = start + i;
+ vaddr addr = start + i;
p = pageflags_find(addr, addr);
if (p) {
@@ -767,7 +728,7 @@ int page_unprotect(tb_page_addr_t address, uintptr_t pc)
* the corresponding translated code.
*/
current_tb_invalidated |=
- tb_invalidate_phys_page_unwind(addr, pc);
+ tb_invalidate_phys_page_unwind(cpu, addr, pc);
}
}
if (prot & PAGE_EXEC) {
@@ -847,6 +808,12 @@ void *probe_access(CPUArchState *env, vaddr addr, int size,
return size ? g2h(env_cpu(env), addr) : NULL;
}
+void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
+ MMUAccessType access_type, int mmu_idx)
+{
+ return g2h(env_cpu(env), addr);
+}
+
tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
void **hostp)
{
@@ -861,7 +828,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
return addr;
}
-#ifdef TARGET_PAGE_DATA_SIZE
/*
* Allocate chunks of target data together. For the only current user,
* if we allocate one hunk per page, we have overhead of 40/128 or 40%.
@@ -877,10 +843,16 @@ typedef struct TargetPageDataNode {
} TargetPageDataNode;
static IntervalTreeRoot targetdata_root;
+static size_t target_page_data_size;
-void page_reset_target_data(target_ulong start, target_ulong last)
+void page_reset_target_data(vaddr start, vaddr last)
{
IntervalTreeNode *n, *next;
+ size_t size = target_page_data_size;
+
+ if (likely(size == 0)) {
+ return;
+ }
assert_memory_lock();
@@ -892,7 +864,7 @@ void page_reset_target_data(target_ulong start, target_ulong last)
n != NULL;
n = next,
next = next ? interval_tree_iter_next(n, start, last) : NULL) {
- target_ulong n_start, n_last, p_ofs, p_len;
+ vaddr n_start, n_last, p_ofs, p_len;
TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
if (n->start >= start && n->last <= last) {
@@ -911,16 +883,21 @@ void page_reset_target_data(target_ulong start, target_ulong last)
n_last = MIN(last, n->last);
p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
- memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
- p_len * TARGET_PAGE_DATA_SIZE);
+ memset(t->data + p_ofs * size, 0, p_len * size);
}
}
-void *page_get_target_data(target_ulong address)
+void *page_get_target_data(vaddr address, size_t size)
{
IntervalTreeNode *n;
TargetPageDataNode *t;
- target_ulong page, region, p_ofs;
+ vaddr page, region, p_ofs;
+
+ /* Remember the size from the first call, and it should be constant. */
+ if (unlikely(target_page_data_size != size)) {
+ assert(target_page_data_size == 0);
+ target_page_data_size = size;
+ }
page = address & TARGET_PAGE_MASK;
region = address & TBD_MASK;
@@ -936,8 +913,7 @@ void *page_get_target_data(target_ulong address)
mmap_lock();
n = interval_tree_iter_first(&targetdata_root, page, page);
if (!n) {
- t = g_malloc0(sizeof(TargetPageDataNode)
- + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
+ t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size);
n = &t->itree;
n->start = region;
n->last = region | ~TBD_MASK;
@@ -948,11 +924,8 @@ void *page_get_target_data(target_ulong address)
t = container_of(n, TargetPageDataNode, itree);
p_ofs = (page - region) >> TARGET_PAGE_BITS;
- return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
+ return t->data + p_ofs * size;
}
-#else
-void page_reset_target_data(target_ulong start, target_ulong last) { }
-#endif /* TARGET_PAGE_DATA_SIZE */
/* The system-mode versions of these helpers are in cputlb.c. */
@@ -1014,7 +987,7 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
* be under mmap_lock() in order to prevent the creation of
* another TranslationBlock in between.
*/
- tb_invalidate_phys_range(addr, addr + l - 1);
+ tb_invalidate_phys_range(NULL, addr, addr + l - 1);
written = pwrite(fd, buf, l,
(off_t)(uintptr_t)g2h_untagged(addr));
if (written != l) {
@@ -1059,7 +1032,7 @@ static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
void *haddr;
uint8_t ret;
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
ret = ldub_p(haddr);
clear_helper_retaddr();
@@ -1073,7 +1046,7 @@ static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
uint16_t ret;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
ret = load_atom_2(cpu, ra, haddr, mop);
clear_helper_retaddr();
@@ -1091,7 +1064,7 @@ static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
uint32_t ret;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
ret = load_atom_4(cpu, ra, haddr, mop);
clear_helper_retaddr();
@@ -1109,7 +1082,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
uint64_t ret;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
ret = load_atom_8(cpu, ra, haddr, mop);
clear_helper_retaddr();
@@ -1120,7 +1093,7 @@ static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
return ret;
}
-static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
+static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
@@ -1128,7 +1101,7 @@ static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
MemOp mop = get_memop(oi);
tcg_debug_assert((mop & MO_SIZE) == MO_128);
- cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
ret = load_atom_16(cpu, ra, haddr, mop);
clear_helper_retaddr();
@@ -1144,7 +1117,7 @@ static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
{
void *haddr;
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
stb_p(haddr, val);
clear_helper_retaddr();
@@ -1156,7 +1129,7 @@ static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
void *haddr;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
@@ -1172,7 +1145,7 @@ static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
void *haddr;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
@@ -1188,7 +1161,7 @@ static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
void *haddr;
MemOp mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
@@ -1204,7 +1177,7 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
void *haddr;
MemOpIdx mop = get_memop(oi);
- cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
if (mop & MO_BSWAP) {
@@ -1214,101 +1187,28 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
clear_helper_retaddr();
}
-uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
-{
- uint32_t ret;
-
- set_helper_retaddr(1);
- ret = ldub_p(g2h_untagged(ptr));
- clear_helper_retaddr();
- return ret;
-}
-
-uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
-{
- uint32_t ret;
-
- set_helper_retaddr(1);
- ret = lduw_p(g2h_untagged(ptr));
- clear_helper_retaddr();
- return ret;
-}
-
-uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
-{
- uint32_t ret;
-
- set_helper_retaddr(1);
- ret = ldl_p(g2h_untagged(ptr));
- clear_helper_retaddr();
- return ret;
-}
-
-uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
-{
- uint64_t ret;
-
- set_helper_retaddr(1);
- ret = ldq_p(g2h_untagged(ptr));
- clear_helper_retaddr();
- return ret;
-}
-
-uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
- uint8_t ret;
-
- haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
- ret = ldub_p(haddr);
- clear_helper_retaddr();
- return ret;
+ return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
}
-uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
- uint16_t ret;
-
- haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
- ret = lduw_p(haddr);
- clear_helper_retaddr();
- if (get_memop(oi) & MO_BSWAP) {
- ret = bswap16(ret);
- }
- return ret;
+ return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
}
-uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
- uint32_t ret;
-
- haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
- ret = ldl_p(haddr);
- clear_helper_retaddr();
- if (get_memop(oi) & MO_BSWAP) {
- ret = bswap32(ret);
- }
- return ret;
+ return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
}
-uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
- uint64_t ret;
-
- haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
- ret = ldq_p(haddr);
- clear_helper_retaddr();
- if (get_memop(oi) & MO_BSWAP) {
- ret = bswap64(ret);
- }
- return ret;
+ return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
}
#include "ldst_common.c.inc"
diff --git a/accel/tcg/watchpoint.c b/accel/tcg/watchpoint.c
index 65b2188..cfb37a4 100644
--- a/accel/tcg/watchpoint.c
+++ b/accel/tcg/watchpoint.c
@@ -124,17 +124,14 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
}
cpu->watchpoint_hit = wp;
- mmap_lock();
/* This call also restores vCPU state */
tb_check_watchpoint(cpu, ra);
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
cpu->exception_index = EXCP_DEBUG;
- mmap_unlock();
cpu_loop_exit(cpu);
} else {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
- mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
} else {
diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 7aa28b9..97377d6 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -18,7 +18,9 @@
#include "hw/xen/xen_igd.h"
#include "chardev/char.h"
#include "qemu/accel.h"
-#include "system/accel-ops.h"
+#include "accel/dummy-cpus.h"
+#include "accel/accel-ops.h"
+#include "accel/accel-cpu-ops.h"
#include "system/cpus.h"
#include "system/xen.h"
#include "system/runstate.h"
@@ -63,7 +65,7 @@ static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp)
xen_igd_gfx_pt_set(value, errp);
}
-static void xen_setup_post(MachineState *ms, AccelState *accel)
+static void xen_setup_post(AccelState *as)
{
int rc;
@@ -76,7 +78,7 @@ static void xen_setup_post(MachineState *ms, AccelState *accel)
}
}
-static int xen_init(MachineState *ms)
+static int xen_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -116,7 +118,7 @@ static int xen_init(MachineState *ms)
return 0;
}
-static void xen_accel_class_init(ObjectClass *oc, void *data)
+static void xen_accel_class_init(ObjectClass *oc, const void *data)
{
AccelClass *ac = ACCEL_CLASS(oc);
static GlobalProperty compat[] = {
@@ -147,11 +149,12 @@ static const TypeInfo xen_accel_type = {
.class_init = xen_accel_class_init,
};
-static void xen_accel_ops_class_init(ObjectClass *oc, void *data)
+static void xen_accel_ops_class_init(ObjectClass *oc, const void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = dummy_start_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
}
static const TypeInfo xen_accel_ops_type = {