aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.b4-config1
-rw-r--r--MAINTAINERS4
-rw-r--r--Makefile1
-rw-r--r--accel/accel-common.c2
-rw-r--r--accel/accel-system.c7
-rw-r--r--accel/dummy-cpus.c1
-rw-r--r--accel/dummy-cpus.h14
-rw-r--r--accel/hvf/hvf-accel-ops.c314
-rw-r--r--accel/hvf/hvf-all.c265
-rw-r--r--accel/kvm/kvm-accel-ops.c1
-rw-r--r--accel/kvm/kvm-all.c59
-rw-r--r--accel/qtest/qtest.c4
-rw-r--r--accel/stubs/kvm-stub.c9
-rw-r--r--accel/stubs/meson.build2
-rw-r--r--accel/stubs/nvmm-stub.c12
-rw-r--r--accel/stubs/whpx-stub.c12
-rw-r--r--accel/tcg/internal-common.h2
-rw-r--r--accel/tcg/monitor.c51
-rw-r--r--accel/tcg/tcg-accel-ops.c6
-rw-r--r--accel/tcg/tcg-all.c6
-rw-r--r--accel/xen/xen-all.c6
-rw-r--r--backends/iommufd.c107
-rw-r--r--backends/trace-events2
-rw-r--r--bsd-user/main.c2
-rw-r--r--docs/about/deprecated.rst8
-rw-r--r--docs/devel/migration/CPR.rst5
-rw-r--r--docs/system/arm/aspeed.rst4
-rw-r--r--hmp-commands-info.hx14
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/arm/aspeed.c285
-rw-r--r--hw/arm/aspeed_eeprom.c21
-rw-r--r--hw/arm/aspeed_eeprom.h3
-rw-r--r--hw/i386/Kconfig2
-rw-r--r--hw/misc/aspeed_scu.c22
-rw-r--r--hw/misc/aspeed_sdmc.c3
-rw-r--r--hw/vfio-user/container.c11
-rw-r--r--hw/vfio/ap.c4
-rw-r--r--hw/vfio/ccw.c4
-rw-r--r--hw/vfio/container-base.c9
-rw-r--r--hw/vfio/cpr-iommufd.c225
-rw-r--r--hw/vfio/cpr-legacy.c25
-rw-r--r--hw/vfio/cpr.c144
-rw-r--r--hw/vfio/device.c40
-rw-r--r--hw/vfio/helpers.c11
-rw-r--r--hw/vfio/iommufd-stubs.c18
-rw-r--r--hw/vfio/iommufd.c81
-rw-r--r--hw/vfio/meson.build2
-rw-r--r--hw/vfio/pci.c109
-rw-r--r--hw/vfio/pci.h2
-rw-r--r--hw/vfio/platform.c2
-rw-r--r--hw/vfio/trace-events3
-rw-r--r--include/exec/cpu-common.h1
-rw-r--r--include/hw/arm/aspeed.h2
-rw-r--r--include/hw/core/cpu.h3
-rw-r--r--include/hw/vfio/vfio-container-base.h15
-rw-r--r--include/hw/vfio/vfio-cpr.h36
-rw-r--r--include/hw/vfio/vfio-device.h3
-rw-r--r--include/migration/cpr.h14
-rw-r--r--include/qemu/accel.h11
-rw-r--r--include/system/accel-ops.h22
-rw-r--r--include/system/cpus.h5
-rw-r--r--include/system/hvf.h38
-rw-r--r--include/system/hvf_int.h35
-rw-r--r--include/system/hw_accel.h19
-rw-r--r--include/system/iommufd.h7
-rw-r--r--include/system/kvm.h9
-rw-r--r--include/system/nvmm.h23
-rw-r--r--include/system/whpx.h23
-rw-r--r--linux-user/main.c2
-rw-r--r--migration/cpr.c52
-rw-r--r--monitor/hmp-cmds-target.c4
-rw-r--r--qapi/machine.json18
-rw-r--r--qapi/migration.json6
-rw-r--r--system/cpus.c12
-rw-r--r--system/memory.c2
-rw-r--r--system/physmem.c5
-rw-r--r--target/arm/hvf/hvf.c4
-rw-r--r--target/i386/hvf/hvf.c4
-rw-r--r--target/i386/hvf/x86hvf.c2
-rw-r--r--target/i386/nvmm/nvmm-accel-ops.c1
-rw-r--r--target/i386/nvmm/nvmm-all.c31
-rw-r--r--target/i386/whpx/whpx-accel-ops.c1
-rw-r--r--target/i386/whpx/whpx-all.c32
-rw-r--r--tests/functional/aspeed.py9
-rw-r--r--tests/functional/meson.build4
-rwxr-xr-xtests/functional/test_arm_aspeed_catalina.py25
-rw-r--r--tests/functional/test_arm_aspeed_gb200nvl_bmc.py26
-rw-r--r--tests/qtest/aspeed_scu-test.c231
-rw-r--r--tests/qtest/meson.build1
-rw-r--r--tests/qtest/qmp-cmd-test.c1
90 files changed, 2063 insertions, 624 deletions
diff --git a/.b4-config b/.b4-config
index 4b9b2fe..126f503 100644
--- a/.b4-config
+++ b/.b4-config
@@ -11,4 +11,3 @@
prep-perpatch-check-cmd = scripts/checkpatch.pl -q --terse --no-summary --mailback -
searchmask = https://lore.kernel.org/qemu-devel/?x=m&t=1&q=%s
linkmask = https://lore.kernel.org/qemu-devel/%s
- linktrailermask = Message-ID: <%s>
diff --git a/MAINTAINERS b/MAINTAINERS
index 12efc88..1842c3d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -495,6 +495,7 @@ Guest CPU Cores (other accelerators)
Overall
M: Richard Henderson <richard.henderson@linaro.org>
R: Paolo Bonzini <pbonzini@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@linaro.org>
S: Maintained
F: include/exec/cpu*.h
F: include/exec/target_long.h
@@ -503,6 +504,7 @@ F: include/system/accel-*.h
F: include/system/cpus.h
F: include/accel/accel-cpu*.h
F: accel/accel-*.?
+F: accel/dummy-cpus.?
F: accel/Makefile.objs
F: accel/stubs/Makefile.objs
F: cpu-common.c
@@ -540,6 +542,7 @@ WHPX CPUs
M: Sunil Muthuswamy <sunilmut@microsoft.com>
S: Supported
F: target/i386/whpx/
+F: accel/stubs/whpx-stub.c
F: include/system/whpx.h
X86 Instruction Emulator
@@ -586,6 +589,7 @@ NetBSD Virtual Machine Monitor (NVMM) CPU support
M: Reinoud Zandijk <reinoud@netbsd.org>
S: Maintained
F: include/system/nvmm.h
+F: accel/stubs/nvmm-stub.c
F: target/i386/nvmm/
Hosts
diff --git a/Makefile b/Makefile
index c92a3cf..74c2da2 100644
--- a/Makefile
+++ b/Makefile
@@ -227,6 +227,7 @@ distclean: clean recurse-distclean
rm -Rf .sdk qemu-bundle
find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
+ -path "$(SRC_PATH)/.pc" -prune -o \
-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
.PHONY: ctags
diff --git a/accel/accel-common.c b/accel/accel-common.c
index 4894b98..591ff4c 100644
--- a/accel/accel-common.c
+++ b/accel/accel-common.c
@@ -124,7 +124,7 @@ int accel_supported_gdbstub_sstep_flags(void)
AccelState *accel = current_accel();
AccelClass *acc = ACCEL_GET_CLASS(accel);
if (acc->gdbstub_supported_sstep_flags) {
- return acc->gdbstub_supported_sstep_flags();
+ return acc->gdbstub_supported_sstep_flags(accel);
}
return 0;
}
diff --git a/accel/accel-system.c b/accel/accel-system.c
index a0f562a..af713cc 100644
--- a/accel/accel-system.c
+++ b/accel/accel-system.c
@@ -37,7 +37,7 @@ int accel_init_machine(AccelState *accel, MachineState *ms)
int ret;
ms->accelerator = accel;
*(acc->allowed) = true;
- ret = acc->init_machine(ms);
+ ret = acc->init_machine(accel, ms);
if (ret < 0) {
ms->accelerator = NULL;
*(acc->allowed) = false;
@@ -58,7 +58,7 @@ void accel_setup_post(MachineState *ms)
AccelState *accel = ms->accelerator;
AccelClass *acc = ACCEL_GET_CLASS(accel);
if (acc->setup_post) {
- acc->setup_post(ms, accel);
+ acc->setup_post(accel);
}
}
@@ -85,8 +85,9 @@ void accel_init_ops_interfaces(AccelClass *ac)
* non-NULL create_vcpu_thread operation.
*/
ops = ACCEL_OPS_CLASS(oc);
+ ac->ops = ops;
if (ops->ops_init) {
- ops->ops_init(ops);
+ ops->ops_init(ac);
}
cpus_register_accel(ops);
}
diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c
index 8672761..03cfc0f 100644
--- a/accel/dummy-cpus.c
+++ b/accel/dummy-cpus.c
@@ -17,6 +17,7 @@
#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
+#include "accel/dummy-cpus.h"
static void *dummy_cpu_thread_fn(void *arg)
{
diff --git a/accel/dummy-cpus.h b/accel/dummy-cpus.h
new file mode 100644
index 0000000..d18dd0f
--- /dev/null
+++ b/accel/dummy-cpus.h
@@ -0,0 +1,14 @@
+/*
+ * Dummy cpu thread code
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ACCEL_DUMMY_CPUS_H
+#define ACCEL_DUMMY_CPUS_H
+
+void dummy_start_vcpu_thread(CPUState *cpu);
+
+#endif
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index b389772..be8724a 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -48,18 +48,16 @@
*/
#include "qemu/osdep.h"
-#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
-#include "system/address-spaces.h"
+#include "qemu/queue.h"
#include "gdbstub/enums.h"
-#include "hw/boards.h"
+#include "exec/cpu-common.h"
+#include "hw/core/cpu.h"
#include "system/accel-ops.h"
#include "system/cpus.h"
#include "system/hvf.h"
#include "system/hvf_int.h"
-#include "system/runstate.h"
-#include "qemu/guest-random.h"
-#include "trace.h"
HVFState *hvf_state;
@@ -79,143 +77,17 @@ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
return NULL;
}
-struct mac_slot {
- int present;
- uint64_t size;
- uint64_t gpa_start;
- uint64_t gva;
-};
-
-struct mac_slot mac_slots[32];
-
-static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
-{
- struct mac_slot *macslot;
- hv_return_t ret;
-
- macslot = &mac_slots[slot->slot_id];
-
- if (macslot->present) {
- if (macslot->size != slot->size) {
- macslot->present = 0;
- trace_hvf_vm_unmap(macslot->gpa_start, macslot->size);
- ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
- assert_hvf_ok(ret);
- }
- }
-
- if (!slot->size) {
- return 0;
- }
-
- macslot->present = 1;
- macslot->gpa_start = slot->start;
- macslot->size = slot->size;
- trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags,
- flags & HV_MEMORY_READ ? 'R' : '-',
- flags & HV_MEMORY_WRITE ? 'W' : '-',
- flags & HV_MEMORY_EXEC ? 'E' : '-');
- ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
- assert_hvf_ok(ret);
- return 0;
-}
-
-static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
-{
- hvf_slot *mem;
- MemoryRegion *area = section->mr;
- bool writable = !area->readonly && !area->rom_device;
- hv_memory_flags_t flags;
- uint64_t page_size = qemu_real_host_page_size();
-
- if (!memory_region_is_ram(area)) {
- if (writable) {
- return;
- } else if (!memory_region_is_romd(area)) {
- /*
- * If the memory device is not in romd_mode, then we actually want
- * to remove the hvf memory slot so all accesses will trap.
- */
- add = false;
- }
- }
-
- if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
- !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
- /* Not page aligned, so we can not map as RAM */
- add = false;
- }
-
- mem = hvf_find_overlap_slot(
- section->offset_within_address_space,
- int128_get64(section->size));
-
- if (mem && add) {
- if (mem->size == int128_get64(section->size) &&
- mem->start == section->offset_within_address_space &&
- mem->mem == (memory_region_get_ram_ptr(area) +
- section->offset_within_region)) {
- return; /* Same region was attempted to register, go away. */
- }
- }
-
- /* Region needs to be reset. set the size to 0 and remap it. */
- if (mem) {
- mem->size = 0;
- if (do_hvf_set_memory(mem, 0)) {
- error_report("Failed to reset overlapping slot");
- abort();
- }
- }
-
- if (!add) {
- return;
- }
-
- if (area->readonly ||
- (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
- flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
- } else {
- flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
- }
-
- /* Now make a new slot. */
- int x;
-
- for (x = 0; x < hvf_state->num_slots; ++x) {
- mem = &hvf_state->slots[x];
- if (!mem->size) {
- break;
- }
- }
-
- if (x == hvf_state->num_slots) {
- error_report("No free slots");
- abort();
- }
-
- mem->size = int128_get64(section->size);
- mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
- mem->start = section->offset_within_address_space;
- mem->region = area;
-
- if (do_hvf_set_memory(mem, flags)) {
- error_report("Error registering new memory slot");
- abort();
- }
-}
-
static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
hvf_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
}
static void hvf_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -224,7 +96,7 @@ static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu,
run_on_cpu_data arg)
{
/* QEMU state is the reference, push it to HVF now and on next entry */
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
@@ -242,147 +114,10 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
}
-static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
-{
- hvf_slot *slot;
-
- slot = hvf_find_overlap_slot(
- section->offset_within_address_space,
- int128_get64(section->size));
-
- /* protect region against writes; begin tracking it */
- if (on) {
- slot->flags |= HVF_SLOT_LOG;
- hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
- HV_MEMORY_READ | HV_MEMORY_EXEC);
- /* stop tracking region*/
- } else {
- slot->flags &= ~HVF_SLOT_LOG;
- hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
- HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
- }
-}
-
-static void hvf_log_start(MemoryListener *listener,
- MemoryRegionSection *section, int old, int new)
-{
- if (old != 0) {
- return;
- }
-
- hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_log_stop(MemoryListener *listener,
- MemoryRegionSection *section, int old, int new)
-{
- if (new != 0) {
- return;
- }
-
- hvf_set_dirty_tracking(section, 0);
-}
-
-static void hvf_log_sync(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- /*
- * sync of dirty pages is handled elsewhere; just make sure we keep
- * tracking the region.
- */
- hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- hvf_set_phys_mem(section, true);
-}
-
-static void hvf_region_del(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- hvf_set_phys_mem(section, false);
-}
-
-static MemoryListener hvf_memory_listener = {
- .name = "hvf",
- .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
- .region_add = hvf_region_add,
- .region_del = hvf_region_del,
- .log_start = hvf_log_start,
- .log_stop = hvf_log_stop,
- .log_sync = hvf_log_sync,
-};
-
static void dummy_signal(int sig)
{
}
-bool hvf_allowed;
-
-static int hvf_accel_init(MachineState *ms)
-{
- int x;
- hv_return_t ret;
- HVFState *s;
- int pa_range = 36;
- MachineClass *mc = MACHINE_GET_CLASS(ms);
-
- if (mc->hvf_get_physical_address_range) {
- pa_range = mc->hvf_get_physical_address_range(ms);
- if (pa_range < 0) {
- return -EINVAL;
- }
- }
-
- ret = hvf_arch_vm_create(ms, (uint32_t)pa_range);
- assert_hvf_ok(ret);
-
- s = g_new0(HVFState, 1);
-
- s->num_slots = ARRAY_SIZE(s->slots);
- for (x = 0; x < s->num_slots; ++x) {
- s->slots[x].size = 0;
- s->slots[x].slot_id = x;
- }
-
- QTAILQ_INIT(&s->hvf_sw_breakpoints);
-
- hvf_state = s;
- memory_listener_register(&hvf_memory_listener, &address_space_memory);
-
- return hvf_arch_init();
-}
-
-static inline int hvf_gdbstub_sstep_flags(void)
-{
- return SSTEP_ENABLE | SSTEP_NOIRQ;
-}
-
-static void hvf_accel_class_init(ObjectClass *oc, const void *data)
-{
- AccelClass *ac = ACCEL_CLASS(oc);
- ac->name = "HVF";
- ac->init_machine = hvf_accel_init;
- ac->allowed = &hvf_allowed;
- ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
-}
-
-static const TypeInfo hvf_accel_type = {
- .name = TYPE_HVF_ACCEL,
- .parent = TYPE_ACCEL,
- .instance_size = sizeof(HVFState),
- .class_init = hvf_accel_class_init,
-};
-
-static void hvf_type_init(void)
-{
- type_register_static(&hvf_accel_type);
-}
-
-type_init(hvf_type_init);
-
static void hvf_vcpu_destroy(CPUState *cpu)
{
hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd);
@@ -415,8 +150,8 @@ static int hvf_init_vcpu(CPUState *cpu)
#else
r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT);
#endif
- cpu->accel->dirty = true;
assert_hvf_ok(r);
+ cpu->vcpu_dirty = true;
cpu->accel->guest_debug_enabled = false;
@@ -482,6 +217,34 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
}
+struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc)
+{
+ struct hvf_sw_breakpoint *bp;
+
+ QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) {
+ if (bp->pc == pc) {
+ return bp;
+ }
+ }
+ return NULL;
+}
+
+int hvf_sw_breakpoints_active(CPUState *cpu)
+{
+ return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints);
+}
+
+static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg)
+{
+ hvf_arch_update_guest_debug(cpu);
+}
+
+int hvf_update_guest_debug(CPUState *cpu)
+{
+ run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL);
+ return 0;
+}
+
static int hvf_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len)
{
struct hvf_sw_breakpoint *bp;
@@ -590,6 +353,7 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, const void *data)
ops->create_vcpu_thread = hvf_start_vcpu_thread;
ops->kick_vcpu_thread = hvf_kick_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
@@ -609,8 +373,10 @@ static const TypeInfo hvf_accel_ops_type = {
.class_init = hvf_accel_ops_class_init,
.abstract = true,
};
+
static void hvf_accel_ops_register_types(void)
{
type_register_static(&hvf_accel_ops_type);
}
+
type_init(hvf_accel_ops_register_types);
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
index 8c387fd..b6075c0 100644
--- a/accel/hvf/hvf-all.c
+++ b/accel/hvf/hvf-all.c
@@ -10,9 +10,24 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
#include "system/hvf.h"
#include "system/hvf_int.h"
#include "hw/core/cpu.h"
+#include "hw/boards.h"
+#include "trace.h"
+
+bool hvf_allowed;
+
+struct mac_slot {
+ int present;
+ uint64_t size;
+ uint64_t gpa_start;
+ uint64_t gva;
+};
+
+struct mac_slot mac_slots[32];
const char *hvf_return_string(hv_return_t ret)
{
@@ -42,30 +57,254 @@ void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
abort();
}
-struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc)
+static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
{
- struct hvf_sw_breakpoint *bp;
+ struct mac_slot *macslot;
+ hv_return_t ret;
- QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) {
- if (bp->pc == pc) {
- return bp;
+ macslot = &mac_slots[slot->slot_id];
+
+ if (macslot->present) {
+ if (macslot->size != slot->size) {
+ macslot->present = 0;
+ trace_hvf_vm_unmap(macslot->gpa_start, macslot->size);
+ ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
+ assert_hvf_ok(ret);
}
}
- return NULL;
+
+ if (!slot->size) {
+ return 0;
+ }
+
+ macslot->present = 1;
+ macslot->gpa_start = slot->start;
+ macslot->size = slot->size;
+ trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags,
+ flags & HV_MEMORY_READ ? 'R' : '-',
+ flags & HV_MEMORY_WRITE ? 'W' : '-',
+ flags & HV_MEMORY_EXEC ? 'E' : '-');
+ ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
+ assert_hvf_ok(ret);
+ return 0;
}
-int hvf_sw_breakpoints_active(CPUState *cpu)
+static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
{
- return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints);
+ hvf_slot *mem;
+ MemoryRegion *area = section->mr;
+ bool writable = !area->readonly && !area->rom_device;
+ hv_memory_flags_t flags;
+ uint64_t page_size = qemu_real_host_page_size();
+
+ if (!memory_region_is_ram(area)) {
+ if (writable) {
+ return;
+ } else if (!memory_region_is_romd(area)) {
+ /*
+ * If the memory device is not in romd_mode, then we actually want
+ * to remove the hvf memory slot so all accesses will trap.
+ */
+ add = false;
+ }
+ }
+
+ if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
+ !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
+ /* Not page aligned, so we can not map as RAM */
+ add = false;
+ }
+
+ mem = hvf_find_overlap_slot(
+ section->offset_within_address_space,
+ int128_get64(section->size));
+
+ if (mem && add) {
+ if (mem->size == int128_get64(section->size) &&
+ mem->start == section->offset_within_address_space &&
+ mem->mem == (memory_region_get_ram_ptr(area) +
+ section->offset_within_region)) {
+ return; /* Same region was attempted to register, go away. */
+ }
+ }
+
+ /* Region needs to be reset. set the size to 0 and remap it. */
+ if (mem) {
+ mem->size = 0;
+ if (do_hvf_set_memory(mem, 0)) {
+ error_report("Failed to reset overlapping slot");
+ abort();
+ }
+ }
+
+ if (!add) {
+ return;
+ }
+
+ if (area->readonly ||
+ (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
+ flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
+ } else {
+ flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
+ }
+
+ /* Now make a new slot. */
+ int x;
+
+ for (x = 0; x < hvf_state->num_slots; ++x) {
+ mem = &hvf_state->slots[x];
+ if (!mem->size) {
+ break;
+ }
+ }
+
+ if (x == hvf_state->num_slots) {
+ error_report("No free slots");
+ abort();
+ }
+
+ mem->size = int128_get64(section->size);
+ mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
+ mem->start = section->offset_within_address_space;
+ mem->region = area;
+
+ if (do_hvf_set_memory(mem, flags)) {
+ error_report("Error registering new memory slot");
+ abort();
+ }
}
-static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg)
+static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
{
- hvf_arch_update_guest_debug(cpu);
+ hvf_slot *slot;
+
+ slot = hvf_find_overlap_slot(
+ section->offset_within_address_space,
+ int128_get64(section->size));
+
+ /* protect region against writes; begin tracking it */
+ if (on) {
+ slot->flags |= HVF_SLOT_LOG;
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+ HV_MEMORY_READ | HV_MEMORY_EXEC);
+ /* stop tracking region*/
+ } else {
+ slot->flags &= ~HVF_SLOT_LOG;
+ hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+ HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
+ }
}
-int hvf_update_guest_debug(CPUState *cpu)
+static void hvf_log_start(MemoryListener *listener,
+ MemoryRegionSection *section, int old, int new)
{
- run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL);
- return 0;
+ if (old != 0) {
+ return;
+ }
+
+ hvf_set_dirty_tracking(section, 1);
}
+
+static void hvf_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section, int old, int new)
+{
+ if (new != 0) {
+ return;
+ }
+
+ hvf_set_dirty_tracking(section, 0);
+}
+
+static void hvf_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /*
+ * sync of dirty pages is handled elsewhere; just make sure we keep
+ * tracking the region.
+ */
+ hvf_set_dirty_tracking(section, 1);
+}
+
+static void hvf_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ hvf_set_phys_mem(section, true);
+}
+
+static void hvf_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ hvf_set_phys_mem(section, false);
+}
+
+static MemoryListener hvf_memory_listener = {
+ .name = "hvf",
+ .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
+ .region_add = hvf_region_add,
+ .region_del = hvf_region_del,
+ .log_start = hvf_log_start,
+ .log_stop = hvf_log_stop,
+ .log_sync = hvf_log_sync,
+};
+
+static int hvf_accel_init(AccelState *as, MachineState *ms)
+{
+ int x;
+ hv_return_t ret;
+ HVFState *s;
+ int pa_range = 36;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+ if (mc->hvf_get_physical_address_range) {
+ pa_range = mc->hvf_get_physical_address_range(ms);
+ if (pa_range < 0) {
+ return -EINVAL;
+ }
+ }
+
+ ret = hvf_arch_vm_create(ms, (uint32_t)pa_range);
+ assert_hvf_ok(ret);
+
+ s = g_new0(HVFState, 1);
+
+ s->num_slots = ARRAY_SIZE(s->slots);
+ for (x = 0; x < s->num_slots; ++x) {
+ s->slots[x].size = 0;
+ s->slots[x].slot_id = x;
+ }
+
+ QTAILQ_INIT(&s->hvf_sw_breakpoints);
+
+ hvf_state = s;
+ memory_listener_register(&hvf_memory_listener, &address_space_memory);
+
+ return hvf_arch_init();
+}
+
+static int hvf_gdbstub_sstep_flags(AccelState *as)
+{
+ return SSTEP_ENABLE | SSTEP_NOIRQ;
+}
+
+static void hvf_accel_class_init(ObjectClass *oc, const void *data)
+{
+ AccelClass *ac = ACCEL_CLASS(oc);
+ ac->name = "HVF";
+ ac->init_machine = hvf_accel_init;
+ ac->allowed = &hvf_allowed;
+ ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
+}
+
+static const TypeInfo hvf_accel_type = {
+ .name = TYPE_HVF_ACCEL,
+ .parent = TYPE_ACCEL,
+ .instance_size = sizeof(HVFState),
+ .class_init = hvf_accel_class_init,
+};
+
+static void hvf_type_init(void)
+{
+ type_register_static(&hvf_accel_type);
+}
+
+type_init(hvf_type_init);
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index e5c1544..0eafc90 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -101,6 +101,7 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, const void *data)
ops->synchronize_post_init = kvm_cpu_synchronize_post_init;
ops->synchronize_state = kvm_cpu_synchronize_state;
ops->synchronize_pre_loadvm = kvm_cpu_synchronize_pre_loadvm;
+ ops->handle_interrupt = generic_handle_interrupt;
#ifdef TARGET_KVM_HAVE_GUEST_DEBUG
ops->update_guest_debug = kvm_update_guest_debug_ops;
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d095d1b..a106d1b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -453,7 +453,13 @@ static void kvm_reset_parked_vcpus(KVMState *s)
}
}
-int kvm_create_vcpu(CPUState *cpu)
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+static int kvm_create_vcpu(CPUState *cpu)
{
unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
KVMState *s = kvm_state;
@@ -515,16 +521,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
goto err;
}
+ /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
ret = munmap(cpu->kvm_run, mmap_size);
if (ret < 0) {
goto err;
}
+ cpu->kvm_run = NULL;
if (cpu->kvm_dirty_gfns) {
ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
if (ret < 0) {
goto err;
}
+ cpu->kvm_dirty_gfns = NULL;
}
kvm_park_vcpu(cpu);
@@ -608,6 +621,31 @@ err:
return ret;
}
+void kvm_close(void)
+{
+ CPUState *cpu;
+
+ if (!kvm_state || kvm_state->fd == -1) {
+ return;
+ }
+
+ CPU_FOREACH(cpu) {
+ cpu_remove_sync(cpu);
+ close(cpu->kvm_fd);
+ cpu->kvm_fd = -1;
+ close(cpu->kvm_vcpu_stats_fd);
+ cpu->kvm_vcpu_stats_fd = -1;
+ }
+
+ if (kvm_state && kvm_state->fd != -1) {
+ close(kvm_state->vmfd);
+ kvm_state->vmfd = -1;
+ close(kvm_state->fd);
+ kvm_state->fd = -1;
+ }
+ kvm_state = NULL;
+}
+
/*
* dirty pages logging control
*/
@@ -2464,13 +2502,10 @@ uint32_t kvm_dirty_ring_size(void)
return kvm_state->kvm_dirty_ring_size;
}
-static int do_kvm_create_vm(MachineState *ms, int type)
+static int do_kvm_create_vm(KVMState *s, int type)
{
- KVMState *s;
int ret;
- s = KVM_STATE(ms->accelerator);
-
do {
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
} while (ret == -EINTR);
@@ -2567,7 +2602,7 @@ static int kvm_setup_dirty_ring(KVMState *s)
return 0;
}
-static int kvm_init(MachineState *ms)
+static int kvm_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
static const char upgrade_note[] =
@@ -2582,15 +2617,13 @@ static int kvm_init(MachineState *ms)
{ /* end of list */ }
}, *nc = num_cpus;
int soft_vcpus_limit, hard_vcpus_limit;
- KVMState *s;
+ KVMState *s = KVM_STATE(as);
const KVMCapabilityInfo *missing_cap;
int ret;
int type;
qemu_mutex_init(&kml_slots_lock);
- s = KVM_STATE(ms->accelerator);
-
/*
* On systems where the kernel can support different base page
* sizes, host page size may be different from TARGET_PAGE_SIZE,
@@ -2642,7 +2675,7 @@ static int kvm_init(MachineState *ms)
goto err;
}
- ret = do_kvm_create_vm(ms, type);
+ ret = do_kvm_create_vm(s, type);
if (ret < 0) {
goto err;
}
@@ -3785,10 +3818,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
return r;
}
-static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as,
+static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as,
hwaddr start_addr, hwaddr size)
{
- KVMState *kvm = KVM_STATE(ms->accelerator);
+ KVMState *kvm = KVM_STATE(accel);
int i;
for (i = 0; i < kvm->nr_as; ++i) {
@@ -3979,7 +4012,7 @@ static void kvm_accel_instance_init(Object *obj)
* Returns: SSTEP_* flags that KVM supports for guest debug. The
* support is probed during kvm_init()
*/
-static int kvm_gdbstub_sstep_flags(void)
+static int kvm_gdbstub_sstep_flags(AccelState *as)
{
return kvm_sstep_flags;
}
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index 92bed92..2b83126 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -24,6 +24,7 @@
#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
+#include "accel/dummy-cpus.h"
static int64_t qtest_clock_counter;
@@ -37,7 +38,7 @@ static void qtest_set_virtual_clock(int64_t count)
qatomic_set_i64(&qtest_clock_counter, count);
}
-static int qtest_init_accel(MachineState *ms)
+static int qtest_init_accel(AccelState *as, MachineState *ms)
{
return 0;
}
@@ -66,6 +67,7 @@ static void qtest_accel_ops_class_init(ObjectClass *oc, const void *data)
ops->create_vcpu_thread = dummy_start_vcpu_thread;
ops->get_virtual_clock = qtest_get_virtual_clock;
ops->set_virtual_clock = qtest_set_virtual_clock;
+ ops->handle_interrupt = generic_handle_interrupt;
};
static const TypeInfo qtest_accel_ops_type = {
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index ecfd763..68cd33b 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -29,10 +29,6 @@ void kvm_flush_coalesced_mmio_buffer(void)
{
}
-void kvm_cpu_synchronize_state(CPUState *cpu)
-{
-}
-
bool kvm_has_sync_mmu(void)
{
return false;
@@ -105,11 +101,6 @@ unsigned int kvm_get_free_memslots(void)
return 0;
}
-void kvm_init_cpu_signals(CPUState *cpu)
-{
- abort();
-}
-
bool kvm_arm_supports_user_irq(void)
{
return false;
diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build
index 8ca1a45..9dfc4f9 100644
--- a/accel/stubs/meson.build
+++ b/accel/stubs/meson.build
@@ -3,5 +3,7 @@ system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c'))
+system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c'))
specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss)
diff --git a/accel/stubs/nvmm-stub.c b/accel/stubs/nvmm-stub.c
new file mode 100644
index 0000000..ec14837
--- /dev/null
+++ b/accel/stubs/nvmm-stub.c
@@ -0,0 +1,12 @@
+/*
+ * NVMM stubs for QEMU
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/nvmm.h"
+
+bool nvmm_allowed;
diff --git a/accel/stubs/whpx-stub.c b/accel/stubs/whpx-stub.c
new file mode 100644
index 0000000..c564c89
--- /dev/null
+++ b/accel/stubs/whpx-stub.c
@@ -0,0 +1,12 @@
+/*
+ * WHPX stubs for QEMU
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/whpx.h"
+
+bool whpx_allowed;
diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h
index 1dbc45d..77a3a06 100644
--- a/accel/tcg/internal-common.h
+++ b/accel/tcg/internal-common.h
@@ -139,4 +139,6 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
+void tcg_dump_stats(GString *buf);
+
#endif
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 1c182b6..e7ed728 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -141,16 +141,26 @@ static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
*pelide = elide;
}
-static void tcg_dump_info(GString *buf)
+static void tcg_dump_flush_info(GString *buf)
{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+ size_t flush_full, flush_part, flush_elide;
+
+ g_string_append_printf(buf, "TB flush count %u\n",
+ qatomic_read(&tb_ctx.tb_flush_count));
+ g_string_append_printf(buf, "TB invalidate count %u\n",
+ qatomic_read(&tb_ctx.tb_phys_invalidate_count));
+
+ tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
+ g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
+ g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
+ g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
}
static void dump_exec_info(GString *buf)
{
struct tb_tree_stats tst = {};
struct qht_stats hst;
- size_t nb_tbs, flush_full, flush_part, flush_elide;
+ size_t nb_tbs;
tcg_tb_foreach(tb_tree_stats_iter, &tst);
nb_tbs = tst.nb_tbs;
@@ -187,50 +197,26 @@ static void dump_exec_info(GString *buf)
qht_statistics_destroy(&hst);
g_string_append_printf(buf, "\nStatistics:\n");
- g_string_append_printf(buf, "TB flush count %u\n",
- qatomic_read(&tb_ctx.tb_flush_count));
- g_string_append_printf(buf, "TB invalidate count %u\n",
- qatomic_read(&tb_ctx.tb_phys_invalidate_count));
-
- tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
- g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
- g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
- g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
- tcg_dump_info(buf);
+ tcg_dump_flush_info(buf);
}
-HumanReadableText *qmp_x_query_jit(Error **errp)
+void tcg_dump_stats(GString *buf)
{
- g_autoptr(GString) buf = g_string_new("");
-
- if (!tcg_enabled()) {
- error_setg(errp, "JIT information is only available with accel=tcg");
- return NULL;
- }
-
dump_accel_info(buf);
dump_exec_info(buf);
dump_drift_info(buf);
-
- return human_readable_text_from_str(buf);
-}
-
-static void tcg_dump_op_count(GString *buf)
-{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
}
-HumanReadableText *qmp_x_query_opcount(Error **errp)
+HumanReadableText *qmp_x_query_jit(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
if (!tcg_enabled()) {
- error_setg(errp,
- "Opcode count information is only available with accel=tcg");
+ error_setg(errp, "JIT information is only available with accel=tcg");
return NULL;
}
- tcg_dump_op_count(buf);
+ tcg_dump_stats(buf);
return human_readable_text_from_str(buf);
}
@@ -238,7 +224,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
static void hmp_tcg_register(void)
{
monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
- monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount);
}
type_init(hmp_tcg_register);
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index b24d6a7..37b4b21 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -93,8 +93,6 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
/* mask must never be zero, except for A20 change call */
void tcg_handle_interrupt(CPUState *cpu, int mask)
{
- g_assert(bql_locked());
-
cpu->interrupt_request |= mask;
/*
@@ -198,8 +196,10 @@ static inline void tcg_remove_all_breakpoints(CPUState *cpu)
cpu_watchpoint_remove_all(cpu, BP_GDB);
}
-static void tcg_accel_ops_init(AccelOpsClass *ops)
+static void tcg_accel_ops_init(AccelClass *ac)
{
+ AccelOpsClass *ops = ac->ops;
+
if (qemu_tcg_mttcg_enabled()) {
ops->create_vcpu_thread = mttcg_start_vcpu_thread;
ops->kick_vcpu_thread = mttcg_kick_vcpu_thread;
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index 6e5dc33..5904582 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -80,9 +80,9 @@ static void tcg_accel_instance_init(Object *obj)
bool one_insn_per_tb;
-static int tcg_init_machine(MachineState *ms)
+static int tcg_init_machine(AccelState *as, MachineState *ms)
{
- TCGState *s = TCG_STATE(current_accel());
+ TCGState *s = TCG_STATE(as);
unsigned max_threads = 1;
#ifndef CONFIG_USER_ONLY
@@ -219,7 +219,7 @@ static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp)
qatomic_set(&one_insn_per_tb, value);
}
-static int tcg_gdbstub_supported_sstep_flags(void)
+static int tcg_gdbstub_supported_sstep_flags(AccelState *as)
{
/*
* In replay mode all events will come from the log and can't be
diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index de52a8f..bd0ff64 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -18,6 +18,7 @@
#include "hw/xen/xen_igd.h"
#include "chardev/char.h"
#include "qemu/accel.h"
+#include "accel/dummy-cpus.h"
#include "system/accel-ops.h"
#include "system/cpus.h"
#include "system/xen.h"
@@ -63,7 +64,7 @@ static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp)
xen_igd_gfx_pt_set(value, errp);
}
-static void xen_setup_post(MachineState *ms, AccelState *accel)
+static void xen_setup_post(AccelState *as)
{
int rc;
@@ -76,7 +77,7 @@ static void xen_setup_post(MachineState *ms, AccelState *accel)
}
}
-static int xen_init(MachineState *ms)
+static int xen_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -152,6 +153,7 @@ static void xen_accel_ops_class_init(ObjectClass *oc, const void *data)
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = dummy_start_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
}
static const TypeInfo xen_accel_ops_type = {
diff --git a/backends/iommufd.c b/backends/iommufd.c
index c2c47ab..2a33c7a 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -16,12 +16,18 @@
#include "qemu/module.h"
#include "qom/object_interfaces.h"
#include "qemu/error-report.h"
+#include "migration/cpr.h"
#include "monitor/monitor.h"
#include "trace.h"
#include "hw/vfio/vfio-device.h"
#include <sys/ioctl.h>
#include <linux/iommufd.h>
+static const char *iommufd_fd_name(IOMMUFDBackend *be)
+{
+ return object_get_canonical_path_component(OBJECT(be));
+}
+
static void iommufd_backend_init(Object *obj)
{
IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
@@ -64,26 +70,73 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
return !be->users;
}
+static void iommufd_backend_complete(UserCreatable *uc, Error **errp)
+{
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
+ const char *name = iommufd_fd_name(be);
+
+ if (!be->owned) {
+ /* fd came from the command line. Fetch updated value from cpr state. */
+ if (cpr_is_incoming()) {
+ be->fd = cpr_find_fd(name, 0);
+ } else {
+ cpr_save_fd(name, 0, be->fd);
+ }
+ }
+}
+
static void iommufd_backend_class_init(ObjectClass *oc, const void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
ucc->can_be_deleted = iommufd_backend_can_be_deleted;
+ ucc->complete = iommufd_backend_complete;
object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
}
+bool iommufd_change_process_capable(IOMMUFDBackend *be)
+{
+ struct iommu_ioas_change_process args = {.size = sizeof(args)};
+
+ /*
+ * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl.
+ * This is a no-op if the process has not changed since DMA was mapped.
+ */
+ return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
+}
+
+bool iommufd_change_process(IOMMUFDBackend *be, Error **errp)
+{
+ struct iommu_ioas_change_process args = {.size = sizeof(args)};
+ bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args);
+
+ if (!ret) {
+ error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed",
+ be->fd);
+ }
+ trace_iommufd_change_process(be->fd, ret);
+ return ret;
+}
+
bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
{
int fd;
if (be->owned && !be->users) {
- fd = qemu_open("/dev/iommu", O_RDWR, errp);
+ fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp);
if (fd < 0) {
return false;
}
be->fd = fd;
}
+ if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) {
+ if (be->owned) {
+ close(be->fd);
+ be->fd = -1;
+ }
+ return false;
+ }
be->users++;
trace_iommufd_backend_connect(be->fd, be->owned, be->users);
@@ -96,9 +149,13 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
goto out;
}
be->users--;
- if (!be->users && be->owned) {
- close(be->fd);
- be->fd = -1;
+ if (!be->users) {
+ vfio_iommufd_cpr_unregister_iommufd(be);
+ if (be->owned) {
+ cpr_delete_fd(iommufd_fd_name(be), 0);
+ close(be->fd);
+ be->fd = -1;
+ }
}
out:
trace_iommufd_backend_disconnect(be->fd, be->users);
@@ -172,6 +229,44 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
return ret;
}
+int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size,
+ int mfd, unsigned long start, bool readonly)
+{
+ int ret, fd = be->fd;
+ struct iommu_ioas_map_file map = {
+ .size = sizeof(map),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .ioas_id = ioas_id,
+ .fd = mfd,
+ .start = start,
+ .iova = iova,
+ .length = size,
+ };
+
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
+ if (!readonly) {
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
+ }
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map);
+ trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start,
+ readonly, ret);
+ if (ret) {
+ ret = -errno;
+
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
+ if (errno == EFAULT) {
+ warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?");
+ }
+ }
+ return ret;
+}
+
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size)
{
@@ -183,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
.length = size,
};
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
/*
* IOMMUFD takes mapping as some kind of object, unmapping
diff --git a/backends/trace-events b/backends/trace-events
index 7278214..56132d3 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -7,10 +7,12 @@ dbus_vmstate_loading(const char *id) "id: %s"
dbus_vmstate_saving(const char *id) "id: %s"
# iommufd.c
+iommufd_change_process(int fd, bool ret) "fd=%d (%d)"
iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d"
iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
+iommufd_backend_map_file_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int fd, unsigned long start, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" fd=%d start=%ld readonly=%d (%d)"
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 7c0a059..d0cc8e0 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -474,7 +474,7 @@ int main(int argc, char **argv)
opt_one_insn_per_tb, &error_abort);
object_property_set_int(OBJECT(accel), "tb-size",
opt_tb_size, &error_abort);
- ac->init_machine(NULL);
+ ac->init_machine(accel, NULL);
}
/*
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 4203713..5a3ed71 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -315,6 +315,14 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name
better reflects the way this property affects all random data within
the device tree blob, not just the ``kaslr-seed`` node.
+Arm ``ast2700a0-evb`` machine (since 10.1)
+''''''''''''''''''''''''''''''''''''''''''
+
+The ``ast2700a0-evb`` machine represents the first revision of the AST2700
+and serves as the initial engineering sample rather than a production version.
+A newer revision, A1, is now supported, and the ``ast2700a1-evb`` should
+replace the older A0 version.
+
Mips ``mipssim`` machine (since 10.0)
'''''''''''''''''''''''''''''''''''''
diff --git a/docs/devel/migration/CPR.rst b/docs/devel/migration/CPR.rst
index 7897873..0a0fd4f 100644
--- a/docs/devel/migration/CPR.rst
+++ b/docs/devel/migration/CPR.rst
@@ -152,8 +152,7 @@ cpr-transfer mode
This mode allows the user to transfer a guest to a new QEMU instance
on the same host with minimal guest pause time, by preserving guest
RAM in place, albeit with new virtual addresses in new QEMU. Devices
-and their pinned memory pages will also be preserved in a future QEMU
-release.
+and their pinned memory pages are also preserved for VFIO and IOMMUFD.
The user starts new QEMU on the same host as old QEMU, with command-
line arguments to create the same machine, plus the ``-incoming``
@@ -322,6 +321,6 @@ Futures
cpr-transfer mode is based on a capability to transfer open file
descriptors from old to new QEMU. In the future, descriptors for
-vfio, iommufd, vhost, and char devices could be transferred,
+vhost, and char devices could be transferred,
preserving those devices and their kernel state without interruption,
even if they do not explicitly support live migration.
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
index 43d27d8..bec0a1d 100644
--- a/docs/system/arm/aspeed.rst
+++ b/docs/system/arm/aspeed.rst
@@ -1,5 +1,4 @@
-Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``)
-=================================================================================================================================================================================================================================================================================================================================================================================================================================
+Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``gb200nvl-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``)
The QEMU Aspeed machines model BMCs of various OpenPOWER systems and
Aspeed evaluation boards. They are based on different releases of the
@@ -35,6 +34,7 @@ AST2600 SoC based machines :
- ``fuji-bmc`` Facebook Fuji BMC
- ``bletchley-bmc`` Facebook Bletchley BMC
- ``fby35-bmc`` Facebook fby35 BMC
+- ``gb200nvl-bmc`` Nvidia GB200nvl BMC
- ``qcom-dc-scm-v1-bmc`` Qualcomm DC-SCM V1 BMC
- ``qcom-firework-bmc`` Qualcomm Firework BMC
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 639a450..d797922 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -256,20 +256,6 @@ SRST
Show dynamic compiler info.
ERST
-#if defined(CONFIG_TCG)
- {
- .name = "opcount",
- .args_type = "",
- .params = "",
- .help = "show dynamic compiler opcode counters",
- },
-#endif
-
-SRST
- ``info opcount``
- Show dynamic compiler opcode counters
-ERST
-
{
.name = "sync-profile",
.args_type = "mean:-m,no_coalesce:-n,max:i?",
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index f543d94..6ea8653 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -532,6 +532,7 @@ config ASPEED_SOC
select I2C
select DPS310
select PCA9552
+ select PCA9554
select SERIAL_MM
select SMBUS_EEPROM
select PCA954X
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index d0b3336..c31bbe7 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -19,6 +19,7 @@
#include "hw/i2c/i2c_mux_pca954x.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/gpio/pca9552.h"
+#include "hw/gpio/pca9554.h"
#include "hw/nvram/eeprom_at24c.h"
#include "hw/sensor/tmp105.h"
#include "hw/misc/led.h"
@@ -197,9 +198,12 @@ struct AspeedMachineState {
#define FUJI_BMC_HW_STRAP2 0x00000000
/* Bletchley hardware value */
-/* TODO: Leave same as EVB for now. */
-#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
-#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
+#define BLETCHLEY_BMC_HW_STRAP1 0x00002000
+#define BLETCHLEY_BMC_HW_STRAP2 0x00000801
+
+/* GB200NVL hardware value */
+#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
+#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
/* Qualcomm DC-SCM hardware value */
#define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000
@@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine)
aspeed_board_init_flashes(&bmc->soc->spi[0],
bmc->spi_model ? bmc->spi_model : amc->spi_model,
1, amc->num_cs);
+ aspeed_board_init_flashes(&bmc->soc->spi[1],
+ amc->spi2_model, 1, amc->num_cs2);
}
if (machine->kernel_filename && sc->num_cpus > 1) {
@@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr)
TYPE_PCA9552, addr);
}
+static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr)
+{
+ return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id),
+ TYPE_PCA9554, addr);
+}
+
static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc)
}
#define TYPE_TMP421 "tmp421"
+#define TYPE_DS1338 "ds1338"
+
+/* Catalina hardware value */
+#define CATALINA_BMC_HW_STRAP1 0x00002002
+#define CATALINA_BMC_HW_STRAP2 0x00000800
+
+#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
+
+static void catalina_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */
+
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[16] = {};
+ I2CSlave *i2c_mux;
+
+ /* busses 0-15 are all used. */
+ for (int i = 0; i < ARRAY_SIZE(i2c); i++) {
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* &i2c0 */
+ /* i2c-mux@71 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71);
+
+ /* i2c-mux@72 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72);
+
+ /* i2c0mux1ch1 */
+ /* io_expander7 - pca9535@20 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x20);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@73 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73);
+
+ /* i2c-mux@75 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75);
+
+ /* i2c-mux@76 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76);
+
+ /* i2c0mux4ch1 */
+ /* io_expander8 - pca9535@21 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x21);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@77 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77);
+
+
+ /* &i2c1 */
+ /* i2c-mux@70 (PCA9548) on i2c1 */
+ i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70);
+ /* i2c1mux0ch0 */
+ /* ina238@41 - no model */
+ /* ina238@42 - no model */
+ /* ina238@44 - no model */
+ /* i2c1mux0ch1 */
+ /* ina238@41 - no model */
+ /* ina238@43 - no model */
+ /* i2c1mux0ch4 */
+ /* ltc4287@42 - no model */
+ /* ltc4287@43 - no model */
+
+ /* i2c1mux0ch5 */
+ /* eeprom@54 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB);
+ /* tpm75@4f */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f);
+
+ /* i2c1mux0ch6 */
+ /* io_expander5 - pca9554@27 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9554, 0x27);
+ /* io_expander6 - pca9555@25 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9552, 0x25);
+ /* eeprom@51 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB);
+
+ /* i2c1mux0ch7 */
+ /* eeprom@53 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB);
+ /* temperature-sensor@4b - tmp75 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b);
+
+ /* &i2c2 */
+ /* io_expander0 - pca9555@20 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20);
+ /* io_expander0 - pca9555@21 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21);
+ /* io_expander0 - pca9555@27 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[2], 0x50, 8 * KiB);
+ /* eeprom@51 */
+ at24c_eeprom_init(i2c[2], 0x51, 8 * KiB);
+
+ /* &i2c5 */
+ /* i2c-mux@70 (PCA9548) on i2c5 */
+ i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70);
+ /* i2c5mux0ch6 */
+ /* eeprom@52 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB);
+ /* i2c5mux0ch7 */
+ /* ina230@40 - no model */
+ /* ina230@41 - no model */
+ /* ina230@44 - no model */
+ /* ina230@45 - no model */
+
+ /* &i2c6 */
+ /* io_expander3 - pca9555@21 */
+ i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21);
+ /* rtc@6f - nct3018y */
+ i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f);
+
+ /* &i2c9 */
+ /* io_expander4 - pca9555@4f */
+ i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f);
+ /* temperature-sensor@4b - tpm75 */
+ i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[9], 0x50, 8 * KiB);
+ /* eeprom@56 */
+ at24c_eeprom_init(i2c[9], 0x56, 8 * KiB);
+
+ /* &i2c10 */
+ /* temperature-sensor@1f - tpm421 */
+ i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[10], 0x50, 8 * KiB);
+
+ /* &i2c11 */
+ /* ssif-bmc@10 - no model */
+
+ /* &i2c12 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[12], 0x50, 8 * KiB);
+
+ /* &i2c13 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[13], 0x50, 8 * KiB);
+ /* eeprom@54 */
+ at24c_eeprom_init(i2c[13], 0x54, 256);
+ /* eeprom@55 */
+ at24c_eeprom_init(i2c[13], 0x55, 256);
+ /* eeprom@57 */
+ at24c_eeprom_init(i2c[13], 0x57, 256);
+
+ /* &i2c14 */
+ /* io_expander9 - pca9555@10 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10);
+ /* io_expander10 - pca9555@11 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11);
+ /* io_expander11 - pca9555@12 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12);
+ /* io_expander12 - pca9555@13 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13);
+ /* io_expander13 - pca9555@14 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14);
+ /* io_expander14 - pca9555@15 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15);
+
+ /* &i2c15 */
+ /* temperature-sensor@1f - tmp421 */
+ i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f);
+ /* eeprom@52 */
+ at24c_eeprom_init(i2c[15], 0x52, 8 * KiB);
+}
static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
{
@@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67);
}
+
+static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[15] = {};
+ DeviceState *dev;
+ for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) {
+ if ((i == 11) || (i == 12) || (i == 13)) {
+ continue;
+ }
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* Bus 5 Expander */
+ create_pca9554(soc, 4, 0x21);
+
+ /* Mux I2c Expanders */
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x71);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x72);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x73);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x75);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x76);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x77);
+
+ /* Bus 10 */
+ dev = DEVICE(create_pca9554(soc, 9, 0x20));
+
+ /* Set FPGA_READY */
+ object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal);
+
+ create_pca9554(soc, 9, 0x21);
+ at24c_eeprom_init(i2c[9], 0x50, 64 * KiB);
+ at24c_eeprom_init(i2c[9], 0x51, 64 * KiB);
+
+ /* Bus 11 */
+ at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid,
+ gb200nvl_bmc_fruid_len);
+}
+
static void fby35_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc,
aspeed_machine_class_init_cpus_defaults(mc);
}
+static void aspeed_machine_catalina_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Facebook Catalina BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = CATALINA_BMC_HW_STRAP1;
+ amc->hw_strap2 = CATALINA_BMC_HW_STRAP2;
+ amc->fmc_model = "w25q01jvq";
+ amc->spi_model = NULL;
+ amc->num_cs = 2;
+ amc->macs_mask = ASPEED_MAC2_ON;
+ amc->i2c_init = catalina_bmc_i2c_init;
+ mc->auto_create_sdcard = true;
+ mc->default_ram_size = CATALINA_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
+#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB)
+
+static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1;
+ amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2;
+ amc->fmc_model = "mx66u51235f";
+ amc->spi_model = "mx66u51235f";
+ amc->num_cs = 2;
+
+ amc->spi2_model = "mx66u51235f";
+ amc->num_cs2 = 1;
+ amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
+ amc->i2c_init = gb200nvl_bmc_i2c_init;
+ mc->default_ram_size = GB200NVL_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
static void fby35_reset(MachineState *state, ResetType type)
{
AspeedMachineState *bmc = ASPEED_MACHINE(state);
@@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = {
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_bletchley_class_init,
}, {
+ .name = MACHINE_TYPE_NAME("gb200nvl-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_gb200nvl_class_init,
+ }, {
+ .name = MACHINE_TYPE_NAME("catalina-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_catalina_class_init,
+ }, {
.name = MACHINE_TYPE_NAME("fby35-bmc"),
.parent = MACHINE_TYPE_NAME("ast2600-evb"),
.class_init = aspeed_machine_fby35_class_init,
diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c
index daa3d32..8bbbdec 100644
--- a/hw/arm/aspeed_eeprom.c
+++ b/hw/arm/aspeed_eeprom.c
@@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = {
0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
};
+const uint8_t gb200nvl_bmc_fruid[] = {
+ 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f,
+ 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33,
+ 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38,
+ 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33,
+ 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30,
+ 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a,
+ 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a,
+ 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44,
+ 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43,
+ 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30,
+ 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31,
+ 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31,
+ 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+};
+
const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid);
const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid);
const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid);
@@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid);
const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid);
const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid);
const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid);
+const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid);
+
diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h
index f08c16e..3ed9bc1 100644
--- a/hw/arm/aspeed_eeprom.h
+++ b/hw/arm/aspeed_eeprom.h
@@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len;
extern const uint8_t rainier_bmc_fruid[];
extern const size_t rainier_bmc_fruid_len;
+extern const uint8_t gb200nvl_bmc_fruid[];
+extern const size_t gb200nvl_bmc_fruid_len;
+
#endif
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index eb65bda..14d23e2 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -13,7 +13,7 @@ config SGX
config TDX
bool
select X86_FW_OVMF
- depends on KVM
+ depends on KVM && X86_64
config PC
bool
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 4930e00..a0ab5ee 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -91,6 +91,7 @@
#define BMC_DEV_ID TO_REG(0x1A4)
#define AST2600_PROT_KEY TO_REG(0x00)
+#define AST2600_PROT_KEY2 TO_REG(0x10)
#define AST2600_SILICON_REV TO_REG(0x04)
#define AST2600_SILICON_REV2 TO_REG(0x14)
#define AST2600_SYS_RST_CTRL TO_REG(0x40)
@@ -176,6 +177,7 @@
#define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330)
#define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334)
#define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388)
+#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0)
#define SCU_IO_REGION_SIZE 0x1000
@@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
int reg = TO_REG(offset);
/* Truncate here so bitwise operations below behave as expected */
uint32_t data = data64;
+ bool prot_data_state = data == ASPEED_SCU_PROT_KEY;
+ bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2];
if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
return;
}
- if (reg > PROT_KEY && !s->regs[PROT_KEY]) {
+ if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__);
+ return;
}
trace_aspeed_scu_write(offset, size, data);
switch (reg) {
case AST2600_PROT_KEY:
- s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
+ /*
+ * Writing a value to SCU000 will modify both protection
+ * registers to each protection register individually.
+ */
+ s->regs[AST2600_PROT_KEY] = prot_data_state;
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
+ return;
+ case AST2600_PROT_KEY2:
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
return;
case AST2600_HW_STRAP1:
case AST2600_HW_STRAP2:
@@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset,
s->regs[reg - 1] ^= data;
updated = true;
break;
+ case AST2700_SCUIO_FREQ_CNT_CTL:
+ s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1)));
+ updated = true;
+ break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
@@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
[AST2700_SCUIO_UARTCLK_GEN] = 0x00014506,
[AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0,
[AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2,
+ [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080,
};
static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index f04d993..dff7cc3 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev)
/* Set ram size bit and defaults values */
s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0);
+ /* Skipping dram init */
+ s->regs[R_MAIN_CONTROL] = BIT(16);
+
if (s->unlocked) {
s->regs[R_2700_PROT] = PROT_UNLOCKED;
}
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
index 3133fef..d318e6a 100644
--- a/hw/vfio-user/container.c
+++ b/hw/vfio-user/container.c
@@ -13,7 +13,6 @@
#include "hw/vfio-user/container.h"
#include "hw/vfio-user/device.h"
#include "hw/vfio-user/trace.h"
-#include "hw/vfio/vfio-cpr.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-listener.h"
#include "qapi/error.h"
@@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
bcontainer = &container->bcontainer;
- if (!vfio_cpr_register_container(bcontainer, errp)) {
- goto free_container_exit;
- }
-
ret = ram_block_uncoordinated_discard_disable(true);
if (ret) {
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- goto unregister_container_exit;
+ goto free_container_exit;
}
vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
@@ -261,9 +256,6 @@ listener_release_exit:
enable_discards_exit:
ram_block_uncoordinated_discard_disable(false);
-unregister_container_exit:
- vfio_cpr_unregister_container(bcontainer);
-
free_container_exit:
object_unref(container);
@@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container)
vioc->release(bcontainer);
}
- vfio_cpr_unregister_container(bcontainer);
object_unref(container);
vfio_address_space_put(space);
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 1df4438..7719f24 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
}
static void vfio_ap_unrealize(DeviceState *dev)
@@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev)
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
vfio_device_detach(&vapdev->vdev);
- g_free(vapdev->vdev.name);
+ vfio_device_free_name(&vapdev->vdev);
}
static const Property vfio_ap_properties[] = {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index cea9d6e..9560b8d 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -619,7 +619,7 @@ out_io_notifier_err:
out_region_err:
vfio_device_detach(vbasedev);
out_attach_dev_err:
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
out_unrealize:
if (cdc->unrealize) {
cdc->unrealize(cdev);
@@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
vfio_ccw_put_region(vcdev);
vfio_device_detach(&vcdev->vdev);
- g_free(vcdev->vdev.name);
+ vfio_device_free_name(&vcdev->vdev);
if (cdc->unrealize) {
cdc->unrealize(cdev);
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index d834bd4..5630497 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ RAMBlock *rb = mr->ram_block;
+ int mfd = rb ? qemu_ram_get_fd(rb) : -1;
+ if (mfd >= 0 && vioc->dma_map_file) {
+ unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
+ unsigned long offset = qemu_ram_get_fd_offset(rb);
+
+ return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
+ readonly);
+ }
g_assert(vioc->dma_map);
return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
new file mode 100644
index 0000000..148a06d
--- /dev/null
+++ b/hw/vfio/cpr-iommufd.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2024-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "system/iommufd.h"
+#include "vfio-iommufd.h"
+#include "trace.h"
+
+typedef struct CprVFIODevice {
+ char *name;
+ unsigned int namelen;
+ uint32_t ioas_id;
+ int devid;
+ uint32_t hwpt_id;
+ QLIST_ENTRY(CprVFIODevice) next;
+} CprVFIODevice;
+
+static const VMStateDescription vmstate_cpr_vfio_device = {
+ .name = "cpr vfio device",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(namelen, CprVFIODevice),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen),
+ VMSTATE_INT32(devid, CprVFIODevice),
+ VMSTATE_UINT32(ioas_id, CprVFIODevice),
+ VMSTATE_UINT32(hwpt_id, CprVFIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device,
+ CprVFIODevice, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void vfio_cpr_save_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = g_new0(CprVFIODevice, 1);
+
+ elem->name = g_strdup(vbasedev->name);
+ elem->namelen = strlen(vbasedev->name) + 1;
+ elem->ioas_id = vbasedev->cpr.ioas_id;
+ elem->devid = vbasedev->devid;
+ elem->hwpt_id = vbasedev->cpr.hwpt_id;
+ QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next);
+}
+
+static CprVFIODevice *find_device(const char *name)
+{
+ CprVFIODeviceList *head = &cpr_state.vfio_devices;
+ CprVFIODevice *elem;
+
+ QLIST_FOREACH(elem, head, next) {
+ if (!strcmp(elem->name, name)) {
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_cpr_delete_device(const char *name)
+{
+ CprVFIODevice *elem = find_device(name);
+
+ if (elem) {
+ QLIST_REMOVE(elem, next);
+ g_free(elem->name);
+ g_free(elem);
+ }
+}
+
+static bool vfio_cpr_find_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = find_device(vbasedev->name);
+
+ if (elem) {
+ vbasedev->cpr.ioas_id = elem->ioas_id;
+ vbasedev->devid = elem->devid;
+ vbasedev->cpr.hwpt_id = elem->hwpt_id;
+ trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id);
+ return true;
+ }
+ return false;
+}
+
+static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp)
+{
+ if (!iommufd_change_process_capable(be)) {
+ if (errp) {
+ error_setg(errp, "vfio iommufd backend does not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ }
+ return false;
+ }
+ return true;
+}
+
+static int iommufd_cpr_pre_save(void *opaque)
+{
+ IOMMUFDBackend *be = opaque;
+
+ /*
+ * The process has not changed yet, but proactively try the ioctl,
+ * and it will fail if any DMA mappings are not supported.
+ */
+ if (!iommufd_change_process_capable(be)) {
+ error_report("some memory regions do not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ return -1;
+ }
+ return 0;
+}
+
+static int iommufd_cpr_post_load(void *opaque, int version_id)
+{
+ IOMMUFDBackend *be = opaque;
+ Error *local_err = NULL;
+
+ if (!iommufd_change_process(be, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static const VMStateDescription iommufd_cpr_vmstate = {
+ .name = "iommufd",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_save = iommufd_cpr_pre_save,
+ .post_load = iommufd_cpr_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp)
+{
+ Error **cpr_blocker = &be->cpr_blocker;
+
+ if (!vfio_cpr_supported(be, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) == 0;
+ }
+
+ vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be);
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
+{
+ vmstate_unregister(NULL, &iommufd_cpr_vmstate, be);
+ migrate_del_blocker(&be->cpr_blocker);
+}
+
+bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ vfio_cpr_add_kvm_notifier();
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+}
+
+void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev)
+{
+ if (!cpr_is_incoming()) {
+ /*
+ * Beware fd may have already been saved by vfio_device_set_fd,
+ * so call resave to avoid a duplicate entry.
+ */
+ cpr_resave_fd(vbasedev->name, 0, vbasedev->fd);
+ vfio_cpr_save_device(vbasedev);
+ }
+}
+
+void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev)
+{
+ cpr_delete_fd(vbasedev->name, 0);
+ vfio_cpr_delete_device(vbasedev->name);
+}
+
+void vfio_cpr_load_device(VFIODevice *vbasedev)
+{
+ if (cpr_is_incoming()) {
+ bool ret = vfio_cpr_find_device(vbasedev);
+ g_assert(ret);
+
+ if (vbasedev->fd < 0) {
+ vbasedev->fd = cpr_find_fd(vbasedev->name, 0);
+ }
+ }
+}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index a84c324..553b203 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id)
{
VFIOContainer *container = opaque;
VFIOContainerBase *bcontainer = &container->bcontainer;
- VFIOGroup *group;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
Error *local_err = NULL;
+ /* During incoming CPR, divert calls to dma_map. */
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
if (!vfio_listener_register(bcontainer, &local_err)) {
error_report_err(local_err);
return -1;
}
- QLIST_FOREACH(group, &container->group_list, container_next) {
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ /* Restore original dma_map function */
+ vioc->dma_map = saved_dma_map;
- /* Restore original dma_map function */
- vioc->dma_map = container->cpr.saved_dma_map;
- }
return 0;
}
@@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
*/
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
vioc->dma_map = vfio_legacy_cpr_dma_map;
container->cpr.remap_listener = (MemoryListener) {
@@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
bcontainer->space->as);
memory_listener_unregister(&container->cpr.remap_listener);
container->cpr.vaddr_unmapped = false;
- vioc->dma_map = container->cpr.saved_dma_map;
+ vioc->dma_map = saved_dma_map;
}
return 0;
}
@@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
MIG_MODE_CPR_TRANSFER, -1) == 0;
}
- vmstate_register(NULL, -1, &vfio_container_vmstate, container);
+ vfio_cpr_add_kvm_notifier();
- /* During incoming CPR, divert calls to dma_map. */
- if (cpr_is_incoming()) {
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- container->cpr.saved_dma_map = vioc->dma_map;
- vioc->dma_map = vfio_legacy_cpr_dma_map;
- }
+ vmstate_register(NULL, -1, &vfio_container_vmstate, container);
migration_add_notifier_mode(&container->cpr.transfer_notifier,
vfio_cpr_fail_notifier,
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index fdbb58e..af0f12a 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -9,6 +9,8 @@
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-cpr.h"
#include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
#include "migration/cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
@@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
return 0;
}
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp)
+#define STRDUP_VECTOR_FD_NAME(vdev, name) \
+ g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
{
- migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
- vfio_cpr_reboot_notifier,
- MIG_MODE_CPR_REBOOT);
- return true;
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_delete_fd(fdname, nr);
}
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+ bool msix)
{
- migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ int i, fd;
+ bool pending = false;
+ PCIDevice *pdev = &vdev->pdev;
+
+ vdev->nr_vectors = nr_vectors;
+ vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+ vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
+
+ for (i = 0; i < nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+ fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+ if (fd >= 0) {
+ vfio_pci_vector_init(vdev, i);
+ vfio_pci_msi_set_handler(vdev, i);
+ }
+
+ if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
+ } else {
+ vdev->msi_vectors[i].virq = -1;
+ }
+
+ if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+ set_bit(i, vdev->msix->pending);
+ pending = true;
+ }
+ }
+
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
+
+ if (msix) {
+ memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+ }
}
/*
@@ -58,13 +110,91 @@ static int vfio_cpr_pci_pre_load(void *opaque)
return 0;
}
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = &vdev->pdev;
+ int nr_vectors;
+
+ if (msix_enabled(pdev)) {
+ vfio_pci_msix_set_notifiers(vdev);
+ nr_vectors = vdev->msix->entries;
+ vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+ } else if (msi_enabled(pdev)) {
+ nr_vectors = msi_nr_vectors_allocated(pdev);
+ vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+ } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+ Error *local_err = NULL;
+ if (!vfio_pci_intx_enable(vdev, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool pci_msix_present(void *opaque, int version_id)
+{
+ PCIDevice *pdev = opaque;
+
+ return msix_present(pdev);
+}
+
+static const VMStateDescription vfio_intx_vmstate = {
+ .name = "vfio-cpr-intx",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(pending, VFIOINTx),
+ VMSTATE_UINT32(route.mode, VFIOINTx),
+ VMSTATE_INT32(route.irq, VFIOINTx),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_VFIO_INTX(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(VFIOINTx), \
+ .vmsd = &vfio_intx_vmstate, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
+}
+
const VMStateDescription vfio_cpr_pci_vmstate = {
.name = "vfio-cpr-pci",
.version_id = 0,
.minimum_version_id = 0,
.pre_load = vfio_cpr_pci_pre_load,
+ .post_load = vfio_cpr_pci_post_load,
.needed = cpr_incoming_needed,
.fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present),
+ VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
VMSTATE_END_OF_LIST()
}
};
+
+static NotifierWithReturn kvm_close_notifier;
+
+static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e,
+ Error **errp)
+{
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
+ vfio_kvm_device_close();
+ }
+ return 0;
+}
+
+void vfio_cpr_add_kvm_notifier(void)
+{
+ if (!kvm_close_notifier.notify) {
+ migration_add_notifier_mode(&kvm_close_notifier,
+ vfio_cpr_kvm_close_notifier,
+ MIG_MODE_CPR_TRANSFER);
+ }
+}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index d91c695..96cf214 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -28,6 +28,8 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/units.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "monitor/monitor.h"
#include "vfio-helpers.h"
@@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
error_setg(errp, "Use FD passing only with iommufd backend");
return false;
}
- /*
- * Give a name with fd so any function printing out vbasedev->name
- * will not break.
- */
if (!vbasedev->name) {
- vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+
+ if (vbasedev->dev->id) {
+ vbasedev->name = g_strdup(vbasedev->dev->id);
+ return true;
+ } else {
+ /*
+ * Assign a name so any function printing it will not break.
+ * The fd number changes across processes, so this cannot be
+ * used as an invariant name for CPR.
+ */
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ error_setg(&vbasedev->cpr.id_blocker,
+ "vfio device with fd=%d needs an id property",
+ vbasedev->fd);
+ return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker,
+ errp, MIG_MODE_CPR_TRANSFER,
+ -1) == 0;
+ }
}
}
return true;
}
-void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+void vfio_device_free_name(VFIODevice *vbasedev)
{
- ERRP_GUARD();
- int fd = monitor_fd_param(monitor_cur(), str, errp);
+ g_clear_pointer(&vbasedev->name, g_free);
+ migrate_del_blocker(&vbasedev->cpr.id_blocker);
+}
- if (fd < 0) {
- error_prepend(errp, "Could not parse remote object fd %s:", str);
- return;
- }
- vbasedev->fd = fd;
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index d0dbab1..9a5f621 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
int vfio_kvm_device_fd = -1;
#endif
+void vfio_kvm_device_close(void)
+{
+#ifdef CONFIG_KVM
+ kvm_close();
+ if (vfio_kvm_device_fd != -1) {
+ close(vfio_kvm_device_fd);
+ vfio_kvm_device_fd = -1;
+ }
+#endif
+}
+
int vfio_kvm_device_add_fd(int fd, Error **errp)
{
#ifdef CONFIG_KVM
diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c
new file mode 100644
index 0000000..0be5276
--- /dev/null
+++ b/hw/vfio/iommufd-stubs.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "migration/cpr.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index d3efef7..48c590b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "migration/cpr.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
@@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
iova, size, vaddr, readonly);
}
+static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ int fd, unsigned long start, bool readonly)
+{
+ const VFIOIOMMUFDContainer *container =
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+
+ return iommufd_backend_map_file_dma(container->be,
+ container->ioas_id,
+ iova, size, fd, start, readonly);
+}
+
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
@@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
goto err_kvm_device_add;
}
+ if (cpr_is_incoming()) {
+ goto skip_bind;
+ }
+
/* Bind device to iommufd */
bind.iommufd = iommufd->fd;
if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
@@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
vbasedev->devid = bind.out_devid;
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
vbasedev->fd, vbasedev->devid);
+
+skip_bind:
return true;
err_bind:
iommufd_cdev_kvm_device_del(vbasedev);
@@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (!cpr_is_incoming()) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
+ ret = 0;
+ } else {
+ continue;
+ }
+
if (ret) {
/* -EINVAL means the domain is incompatible with the device. */
if (ret == -EINVAL) {
@@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
} else {
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
return true;
@@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
+ if (cpr_is_incoming()) {
+ hwpt_id = vbasedev->cpr.hwpt_id;
+ goto skip_alloc;
+ }
+
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
@@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt_id);
+ return false;
+ }
+
+skip_alloc:
hwpt = g_malloc0(sizeof(*hwpt));
hwpt->hwpt_id = hwpt_id;
hwpt->hwpt_flags = flags;
QLIST_INIT(&hwpt->device_list);
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
- if (ret) {
- iommufd_backend_free_id(container->be, hwpt->hwpt_id);
- g_free(hwpt);
- return false;
- }
-
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
@@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
return iommufd_cdev_autodomains_get(vbasedev, container, errp);
}
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ /* If CPR, we are already attached to ioas_id. */
+ return cpr_is_incoming() ||
+ !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
if (!QLIST_EMPTY(&bcontainer->device_list)) {
return;
}
- vfio_cpr_unregister_container(bcontainer);
+ vfio_iommufd_cpr_unregister_container(container);
vfio_listener_unregister(bcontainer);
iommufd_backend_free_id(container->be, container->ioas_id);
object_unref(container);
@@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
VFIOAddressSpace *space;
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
int ret, devfd;
+ bool res;
uint32_t ioas_id;
Error *err = NULL;
const VFIOIOMMUClass *iommufd_vioc =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
+ vfio_cpr_load_device(vbasedev);
+
if (vbasedev->fd < 0) {
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
if (devfd < 0) {
@@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
vbasedev->iommufd != container->be) {
continue;
}
- if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
+
+ if (!cpr_is_incoming()) {
+ res = iommufd_cdev_attach_container(vbasedev, container, &err);
+ } else if (vbasedev->cpr.ioas_id == container->ioas_id) {
+ res = true;
+ } else {
+ continue;
+ }
+
+ if (!res) {
const char *msg = error_get_pretty(err);
trace_iommufd_cdev_fail_attach_existing_container(msg);
@@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
}
}
+ if (cpr_is_incoming()) {
+ ioas_id = vbasedev->cpr.ioas_id;
+ goto skip_ioas_alloc;
+ }
+
/* Need to allocate a new dedicated container */
if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
goto err_alloc_ioas;
@@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
+skip_ioas_alloc:
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
QLIST_INIT(&container->hwpt_list);
+ vbasedev->cpr.ioas_id = ioas_id;
bcontainer = &container->bcontainer;
vfio_address_space_insert(space, bcontainer);
@@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
goto err_listener_register;
}
- if (!vfio_cpr_register_container(bcontainer, errp)) {
+ if (!vfio_iommufd_cpr_register_container(container, errp)) {
goto err_listener_register;
}
@@ -611,6 +665,7 @@ found_container:
}
vfio_device_prepare(vbasedev, bcontainer, &dev_info);
+ vfio_iommufd_cpr_register_device(vbasedev);
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
vbasedev->num_regions, vbasedev->flags);
@@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
iommufd_cdev_container_destroy(container);
vfio_address_space_put(space);
+ vfio_iommufd_cpr_unregister_device(vbasedev);
iommufd_cdev_unbind_and_disconnect(vbasedev);
close(vbasedev->fd);
}
@@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
vioc->dma_map = iommufd_cdev_map;
+ vioc->dma_map_file = iommufd_cdev_map_file;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index 63ea393..bfaf6be 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
))
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
'iommufd.c',
+ 'cpr-iommufd.c',
))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
))
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fa25bde..1093b28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,6 +29,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "hw/vfio/vfio-cpr.h"
#include "migration/vmstate.h"
#include "migration/cpr.h"
#include "qobject/qdict.h"
@@ -57,20 +58,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
+/* Create new or reuse existing eventfd */
static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr, Error **errp)
{
- int ret = event_notifier_init(e, 0);
+ int fd, ret;
+ fd = vfio_cpr_load_vector_fd(vdev, name, nr);
+ if (fd >= 0) {
+ event_notifier_init_fd(e, fd);
+ return true;
+ }
+
+ ret = event_notifier_init(e, 0);
if (ret) {
error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
+ return false;
}
- return !ret;
+
+ fd = event_notifier_get_fd(e);
+ vfio_cpr_save_vector_fd(vdev, name, nr, fd);
+ return true;
}
static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr)
{
+ vfio_cpr_delete_vector_fd(vdev, name, nr);
event_notifier_cleanup(e);
}
@@ -196,6 +210,36 @@ fail:
#endif
}
+static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
+{
+#ifdef CONFIG_KVM
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_resamplefds_enabled()) {
+ return true;
+ }
+
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
+ return false;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &vdev->intx.interrupt,
+ &vdev->intx.unmask,
+ vdev->intx.route.irq)) {
+ error_setg_errno(errp, errno, "failed to setup resample irqfd");
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
+ return false;
+ }
+
+ vdev->intx.kvm_accel = true;
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
+ return true;
+#else
+ return true;
+#endif
+}
+
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
@@ -291,7 +335,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load.
+ * The incoming state is cleared thereafter.
+ */
+ if (!cpr_is_incoming()) {
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
@@ -314,6 +364,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
+
+ if (cpr_is_incoming()) {
+ if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
+ goto skip_signaling;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
@@ -325,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
+skip_signaling:
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
@@ -394,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque)
notify(&vdev->pdev, nr);
}
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ int fd = event_notifier_get_fd(&vector->interrupt);
+
+ qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+}
+
/*
* Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
* fd to kernel.
@@ -656,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
static int vfio_msix_vector_use(PCIDevice *pdev,
unsigned int nr, MSIMessage msg)
{
+ /*
+ * Ignore the callback from msix_set_vector_notifiers during resume.
+ * The necessary subset of these actions is called from
+ * vfio_cpr_claim_vectors during post load.
+ */
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
}
@@ -686,6 +762,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
+{
+ msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+ vfio_msix_vector_release, NULL);
+}
+
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
@@ -2914,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev)
vfio_device_detach(&vdev->vbasedev);
- g_free(vdev->vbasedev.name);
+ vfio_device_free_name(&vdev->vbasedev);
g_free(vdev->msix);
}
@@ -2965,6 +3047,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->err_notifier);
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3032,6 +3119,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->req_notifier);
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ vdev->req_enabled = true;
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3189,7 +3282,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+
+ /*
+ * During CPR, do not call vfio_intx_enable at this time. Instead,
+ * call it from vfio_pci_post_load after the intx routing data has
+ * been loaded from vmstate.
+ */
+ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
timer_free(vdev->intx.mmap_timer);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5ba7330..495fae7 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -218,6 +218,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev);
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr);
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 9a21f2e..5c1795a 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
{
/* @fd takes precedence over @sysfsdev which takes precedence over @host */
if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
} else if (vbasedev->fd < 0) {
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e1728c4..8ec0ad0 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -197,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
+# cpr-iommufd.c
+vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u"
+
# device.c
vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
vfio_device_reset_handler(void) ""
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index a684855..9b658a3 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -85,6 +85,7 @@ void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);
void *qemu_ram_get_host_addr(RAMBlock *rb);
ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
+ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb);
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
bool qemu_ram_is_shared(RAMBlock *rb);
diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
index 973277b..6c36455 100644
--- a/include/hw/arm/aspeed.h
+++ b/include/hw/arm/aspeed.h
@@ -35,7 +35,9 @@ struct AspeedMachineClass {
uint32_t hw_strap2;
const char *fmc_model;
const char *spi_model;
+ const char *spi2_model;
uint32_t num_cs;
+ uint32_t num_cs2;
uint32_t macs_mask;
void (*i2c_init)(AspeedMachineState *bmc);
uint32_t uart_default;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 162a56a..5eaf41a 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -442,6 +442,7 @@ struct qemu_work_item;
* @opaque: User data.
* @mem_io_pc: Host Program Counter at which the memory was accessed.
* @accel: Pointer to accelerator specific state.
+ * @vcpu_dirty: Hardware accelerator is not synchronized with QEMU state
* @kvm_fd: vCPU file descriptor for KVM.
* @work_mutex: Lock to prevent multiple access to @work_list.
* @work_list: List of pending asynchronous work.
@@ -538,7 +539,6 @@ struct CPUState {
uint32_t kvm_fetch_index;
uint64_t dirty_pages;
int kvm_vcpu_stats_fd;
- bool vcpu_dirty;
/* Use by accel-block: CPU is executing an ioctl() */
QemuLockCnt in_ioctl_lock;
@@ -554,6 +554,7 @@ struct CPUState {
uint32_t halted;
int32_t exception_index;
+ bool vcpu_dirty;
AccelCPUState *accel;
/* Used to keep track of an outstanding cpu throttle thread for migration
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3cd86ec..bded6e9 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -168,6 +168,21 @@ struct VFIOIOMMUClass {
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly, MemoryRegion *mr);
/**
+ * @dma_map_file
+ *
+ * Map a file range for the container.
+ *
+ * @bcontainer: #VFIOContainerBase to use for map
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @fd: descriptor of the file to map
+ * @start: starting file offset of the range to map
+ * @readonly: map read only if true
+ */
+ int (*dma_map_file)(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ int fd, unsigned long start, bool readonly);
+ /**
* @dma_unmap
*
* Unmap an address range from the container.
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index 8bf85b9..80ad20d 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -15,19 +15,27 @@
struct VFIOContainer;
struct VFIOContainerBase;
struct VFIOGroup;
+struct VFIODevice;
+struct VFIOPCIDevice;
+struct VFIOIOMMUFDContainer;
+struct IOMMUFDBackend;
+
+typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size, void *vaddr,
+ bool readonly, MemoryRegion *mr);
typedef struct VFIOContainerCPR {
Error *blocker;
bool vaddr_unmapped;
NotifierWithReturn transfer_notifier;
MemoryListener remap_listener;
- int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly, MemoryRegion *mr);
} VFIOContainerCPR;
typedef struct VFIODeviceCPR {
Error *mdev_blocker;
+ Error *id_blocker;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
} VFIODeviceCPR;
bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
@@ -37,9 +45,15 @@ void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
Error **errp);
-bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
- Error **errp);
-void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container,
+ Error **errp);
+void vfio_iommufd_cpr_unregister_container(
+ struct VFIOIOMMUFDContainer *container);
+bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp);
+void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be);
+void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev);
+void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev);
+void vfio_cpr_load_device(struct VFIODevice *vbasedev);
int vfio_cpr_group_get_device_fd(int d, const char *name);
@@ -52,6 +66,16 @@ void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
bool vfio_cpr_ram_discard_register_listener(
struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr, int fd);
+int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr);
+void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
+ int nr);
+
extern const VMStateDescription vfio_cpr_pci_vmstate;
+extern const VMStateDescription vmstate_cpr_vfio_devices;
+
+void vfio_cpr_add_kvm_notifier(void);
#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index c616652..1901a35 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -279,8 +279,11 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
/* Returns 0 on success, or a negative errno. */
bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
+void vfio_device_free_name(VFIODevice *vbasedev);
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
DeviceState *dev, bool ram_discard);
int vfio_device_get_aw_bits(VFIODevice *vdev);
+
+void vfio_kvm_device_close(void);
#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 07858e9..3fc19a7 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -9,11 +9,23 @@
#define MIGRATION_CPR_H
#include "qapi/qapi-types-migration.h"
+#include "qemu/queue.h"
#define MIG_MODE_NONE -1
#define QEMU_CPR_FILE_MAGIC 0x51435052
#define QEMU_CPR_FILE_VERSION 0x00000001
+#define CPR_STATE "CprState"
+
+typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
+typedef QLIST_HEAD(CprVFIODeviceList, CprVFIODevice) CprVFIODeviceList;
+
+typedef struct CprState {
+ CprFdList fds;
+ CprVFIODeviceList vfio_devices;
+} CprState;
+
+extern CprState cpr_state;
void cpr_save_fd(const char *name, int id, int fd);
void cpr_delete_fd(const char *name, int id);
@@ -32,6 +44,8 @@ void cpr_state_close(void);
struct QIOChannel *cpr_state_ioc(void);
bool cpr_incoming_needed(void *opaque);
+int cpr_get_fd_param(const char *name, const char *fdname, int index,
+ Error **errp);
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
diff --git a/include/qemu/accel.h b/include/qemu/accel.h
index fbd3d89..1c097ac 100644
--- a/include/qemu/accel.h
+++ b/include/qemu/accel.h
@@ -37,17 +37,20 @@ typedef struct AccelClass {
/*< public >*/
const char *name;
- int (*init_machine)(MachineState *ms);
+ /* Cached by accel_init_ops_interfaces() when created */
+ AccelOpsClass *ops;
+
+ int (*init_machine)(AccelState *as, MachineState *ms);
bool (*cpu_common_realize)(CPUState *cpu, Error **errp);
void (*cpu_common_unrealize)(CPUState *cpu);
/* system related hooks */
- void (*setup_post)(MachineState *ms, AccelState *accel);
- bool (*has_memory)(MachineState *ms, AddressSpace *as,
+ void (*setup_post)(AccelState *as);
+ bool (*has_memory)(AccelState *accel, AddressSpace *as,
hwaddr start_addr, hwaddr size);
/* gdbstub related hooks */
- int (*gdbstub_supported_sstep_flags)(void);
+ int (*gdbstub_supported_sstep_flags)(AccelState *as);
bool *allowed;
/*
diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h
index 4c99d25..a786c7d 100644
--- a/include/system/accel-ops.h
+++ b/include/system/accel-ops.h
@@ -10,6 +10,7 @@
#ifndef ACCEL_OPS_H
#define ACCEL_OPS_H
+#include "qemu/accel.h"
#include "exec/vaddr.h"
#include "qom/object.h"
@@ -31,7 +32,7 @@ struct AccelOpsClass {
/*< public >*/
/* initialization function called when accel is chosen */
- void (*ops_init)(AccelOpsClass *ops);
+ void (*ops_init)(AccelClass *ac);
bool (*cpus_are_resettable)(void);
void (*cpu_reset_hold)(CPUState *cpu);
@@ -40,12 +41,29 @@ struct AccelOpsClass {
void (*kick_vcpu_thread)(CPUState *cpu);
bool (*cpu_thread_is_idle)(CPUState *cpu);
+ /**
+ * synchronize_post_reset:
+ * synchronize_post_init:
+ * @cpu: The vCPU to synchronize.
+ *
+ * Request to synchronize QEMU vCPU registers to the hardware accelerator
+ * (QEMU is the reference).
+ */
void (*synchronize_post_reset)(CPUState *cpu);
void (*synchronize_post_init)(CPUState *cpu);
+ /**
+ * synchronize_state:
+ * synchronize_pre_loadvm:
+ * @cpu: The vCPU to synchronize.
+ *
+ * Request to synchronize QEMU vCPU registers from the hardware accelerator
+ * (the hardware accelerator is the reference).
+ */
void (*synchronize_state)(CPUState *cpu);
void (*synchronize_pre_loadvm)(CPUState *cpu);
void (*synchronize_pre_resume)(bool step_pending);
+ /* handle_interrupt is mandatory. */
void (*handle_interrupt)(CPUState *cpu, int mask);
/**
@@ -70,4 +88,6 @@ struct AccelOpsClass {
void (*remove_all_breakpoints)(CPUState *cpu);
};
+void generic_handle_interrupt(CPUState *cpu, int mask);
+
#endif /* ACCEL_OPS_H */
diff --git a/include/system/cpus.h b/include/system/cpus.h
index 3226c76..69be6a7 100644
--- a/include/system/cpus.h
+++ b/include/system/cpus.h
@@ -7,11 +7,6 @@ void cpus_register_accel(const AccelOpsClass *i);
/* return registers ops */
const AccelOpsClass *cpus_get_accel(void);
-/* accel/dummy-cpus.c */
-
-/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */
-void dummy_start_vcpu_thread(CPUState *);
-
/* interface available for cpus accelerator threads */
/* For temporary buffers for forming a name */
diff --git a/include/system/hvf.h b/include/system/hvf.h
index a9a502f..d3dcf08 100644
--- a/include/system/hvf.h
+++ b/include/system/hvf.h
@@ -14,10 +14,6 @@
#define HVF_H
#include "qemu/accel.h"
-#include "qemu/queue.h"
-#include "exec/vaddr.h"
-#include "qom/object.h"
-#include "exec/vaddr.h"
#ifdef COMPILING_PER_TARGET
# ifdef CONFIG_HVF
@@ -40,38 +36,4 @@ typedef struct HVFState HVFState;
DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE,
TYPE_HVF_ACCEL)
-#ifdef COMPILING_PER_TARGET
-struct hvf_sw_breakpoint {
- vaddr pc;
- vaddr saved_insn;
- int use_count;
- QTAILQ_ENTRY(hvf_sw_breakpoint) entry;
-};
-
-struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu,
- vaddr pc);
-int hvf_sw_breakpoints_active(CPUState *cpu);
-
-int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
-int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
-int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
-int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
-void hvf_arch_remove_all_hw_breakpoints(void);
-
-/*
- * hvf_update_guest_debug:
- * @cs: CPUState for the CPU to update
- *
- * Update guest to enable or disable debugging. Per-arch specifics will be
- * handled by calling down to hvf_arch_update_guest_debug.
- */
-int hvf_update_guest_debug(CPUState *cpu);
-void hvf_arch_update_guest_debug(CPUState *cpu);
-
-/*
- * Return whether the guest supports debugging.
- */
-bool hvf_arch_supports_guest_debug(void);
-#endif /* COMPILING_PER_TARGET */
-
#endif
diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h
index d774e58..5150c7d 100644
--- a/include/system/hvf_int.h
+++ b/include/system/hvf_int.h
@@ -12,6 +12,8 @@
#define HVF_INT_H
#include "qemu/queue.h"
+#include "exec/vaddr.h"
+#include "qom/object.h"
#ifdef __aarch64__
#include <Hypervisor/Hypervisor.h>
@@ -60,7 +62,6 @@ struct AccelCPUState {
bool vtimer_masked;
sigset_t unblock_ipi_mask;
bool guest_debug_enabled;
- bool dirty;
};
void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
@@ -77,4 +78,36 @@ int hvf_put_registers(CPUState *);
int hvf_get_registers(CPUState *);
void hvf_kick_vcpu_thread(CPUState *cpu);
+struct hvf_sw_breakpoint {
+ vaddr pc;
+ vaddr saved_insn;
+ int use_count;
+ QTAILQ_ENTRY(hvf_sw_breakpoint) entry;
+};
+
+struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu,
+ vaddr pc);
+int hvf_sw_breakpoints_active(CPUState *cpu);
+
+int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp);
+int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
+int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
+void hvf_arch_remove_all_hw_breakpoints(void);
+
+/*
+ * hvf_update_guest_debug:
+ * @cs: CPUState for the CPU to update
+ *
+ * Update guest to enable or disable debugging. Per-arch specifics will be
+ * handled by calling down to hvf_arch_update_guest_debug.
+ */
+int hvf_update_guest_debug(CPUState *cpu);
+void hvf_arch_update_guest_debug(CPUState *cpu);
+
+/*
+ * Return whether the guest supports debugging.
+ */
+bool hvf_arch_supports_guest_debug(void);
+
#endif
diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h
index 380e9e6..fa9228d 100644
--- a/include/system/hw_accel.h
+++ b/include/system/hw_accel.h
@@ -17,9 +17,26 @@
#include "system/whpx.h"
#include "system/nvmm.h"
+/**
+ * cpu_synchronize_state:
+ * cpu_synchronize_pre_loadvm:
+ * @cpu: The vCPU to synchronize.
+ *
+ * Request to synchronize QEMU vCPU registers from the hardware accelerator
+ * (the hardware accelerator is the reference).
+ */
void cpu_synchronize_state(CPUState *cpu);
+void cpu_synchronize_pre_loadvm(CPUState *cpu);
+
+/**
+ * cpu_synchronize_post_reset:
+ * cpu_synchronize_post_init:
+ * @cpu: The vCPU to synchronize.
+ *
+ * Request to synchronize QEMU vCPU registers to the hardware accelerator
+ * (QEMU is the reference).
+ */
void cpu_synchronize_post_reset(CPUState *cpu);
void cpu_synchronize_post_init(CPUState *cpu);
-void cpu_synchronize_pre_loadvm(CPUState *cpu);
#endif /* QEMU_HW_ACCEL_H */
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index 283861b..c9c72ff 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -32,6 +32,7 @@ struct IOMMUFDBackend {
/*< protected >*/
int fd; /* /dev/iommu file descriptor */
bool owned; /* is the /dev/iommu opened internally */
+ Error *cpr_blocker;/* set if be does not support CPR */
uint32_t users;
/*< public >*/
@@ -43,6 +44,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be);
bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
Error **errp);
void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
+int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id,
+ hwaddr iova, ram_addr_t size, int fd,
+ unsigned long start, bool readonly);
int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly);
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
@@ -66,6 +70,9 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
uint32_t *entry_num, void *data,
Error **errp);
+bool iommufd_change_process_capable(IOMMUFDBackend *be);
+bool iommufd_change_process(IOMMUFDBackend *be, Error **errp);
+
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
HOST_IOMMU_DEVICE_IOMMUFD)
diff --git a/include/system/kvm.h b/include/system/kvm.h
index 7cc60d2..3c7d314 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -195,6 +195,7 @@ bool kvm_has_sync_mmu(void);
int kvm_has_vcpu_events(void);
int kvm_max_nested_state_length(void);
int kvm_has_gsi_routing(void);
+void kvm_close(void);
/**
* kvm_arm_supports_user_irq
@@ -317,14 +318,6 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test);
bool kvm_device_supported(int vmfd, uint64_t type);
/**
- * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
- * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
- *
- * @returns: 0 when success, errno (<0) when failed.
- */
-int kvm_create_vcpu(CPUState *cpu);
-
-/**
* kvm_park_vcpu - Park QEMU KVM vCPU context
* @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
*
diff --git a/include/system/nvmm.h b/include/system/nvmm.h
index 6971ddb..7390def 100644
--- a/include/system/nvmm.h
+++ b/include/system/nvmm.h
@@ -13,17 +13,18 @@
#define QEMU_NVMM_H
#ifdef COMPILING_PER_TARGET
-
-#ifdef CONFIG_NVMM
-
-int nvmm_enabled(void);
-
-#else /* CONFIG_NVMM */
-
-#define nvmm_enabled() (0)
-
-#endif /* CONFIG_NVMM */
-
+# ifdef CONFIG_NVMM
+# define CONFIG_NVMM_IS_POSSIBLE
+# endif /* !CONFIG_NVMM */
+#else
+# define CONFIG_NVMM_IS_POSSIBLE
#endif /* COMPILING_PER_TARGET */
+#ifdef CONFIG_NVMM_IS_POSSIBLE
+extern bool nvmm_allowed;
+#define nvmm_enabled() (nvmm_allowed)
+#else /* !CONFIG_NVMM_IS_POSSIBLE */
+#define nvmm_enabled() 0
+#endif /* !CONFIG_NVMM_IS_POSSIBLE */
+
#endif /* QEMU_NVMM_H */
diff --git a/include/system/whpx.h b/include/system/whpx.h
index 00ff409..00f6a3e 100644
--- a/include/system/whpx.h
+++ b/include/system/whpx.h
@@ -16,19 +16,20 @@
#define QEMU_WHPX_H
#ifdef COMPILING_PER_TARGET
+# ifdef CONFIG_WHPX
+# define CONFIG_WHPX_IS_POSSIBLE
+# endif /* !CONFIG_WHPX */
+#else
+# define CONFIG_WHPX_IS_POSSIBLE
+#endif /* COMPILING_PER_TARGET */
-#ifdef CONFIG_WHPX
-
-int whpx_enabled(void);
+#ifdef CONFIG_WHPX_IS_POSSIBLE
+extern bool whpx_allowed;
+#define whpx_enabled() (whpx_allowed)
bool whpx_apic_in_platform(void);
-
-#else /* CONFIG_WHPX */
-
-#define whpx_enabled() (0)
+#else /* !CONFIG_WHPX_IS_POSSIBLE */
+#define whpx_enabled() 0
#define whpx_apic_in_platform() (0)
-
-#endif /* CONFIG_WHPX */
-
-#endif /* COMPILING_PER_TARGET */
+#endif /* !CONFIG_WHPX_IS_POSSIBLE */
#endif /* QEMU_WHPX_H */
diff --git a/linux-user/main.c b/linux-user/main.c
index 5ac5b55..a9142ee 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -820,7 +820,7 @@ int main(int argc, char **argv, char **envp)
opt_one_insn_per_tb, &error_abort);
object_property_set_int(OBJECT(accel), "tb-size",
opt_tb_size, &error_abort);
- ac->init_machine(NULL);
+ ac->init_machine(accel, NULL);
}
/*
diff --git a/migration/cpr.c b/migration/cpr.c
index a50a57e..42ad0b0 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -7,25 +7,21 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "hw/vfio/vfio-device.h"
#include "migration/cpr.h"
#include "migration/misc.h"
#include "migration/options.h"
#include "migration/qemu-file.h"
#include "migration/savevm.h"
#include "migration/vmstate.h"
+#include "monitor/monitor.h"
#include "system/runstate.h"
#include "trace.h"
/*************************************************************************/
/* cpr state container for all information to be saved. */
-typedef QLIST_HEAD(CprFdList, CprFd) CprFdList;
-
-typedef struct CprState {
- CprFdList fds;
-} CprState;
-
-static CprState cpr_state;
+CprState cpr_state;
/****************************************************************************/
@@ -126,8 +122,6 @@ int cpr_open_fd(const char *path, int flags, const char *name, int id,
}
/*************************************************************************/
-#define CPR_STATE "CprState"
-
static const VMStateDescription vmstate_cpr_state = {
.name = CPR_STATE,
.version_id = 1,
@@ -135,6 +129,10 @@ static const VMStateDescription vmstate_cpr_state = {
.fields = (VMStateField[]) {
VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_cpr_vfio_devices,
+ NULL
}
};
/*************************************************************************/
@@ -264,3 +262,39 @@ bool cpr_incoming_needed(void *opaque)
MigMode mode = migrate_mode();
return mode == MIG_MODE_CPR_TRANSFER;
}
+
+/*
+ * cpr_get_fd_param: find a descriptor and return its value.
+ *
+ * @name: CPR name for the descriptor
+ * @fdname: An integer-valued string, or a name passed to a getfd command
+ * @index: CPR index of the descriptor
+ * @errp: returned error message
+ *
+ * If CPR is not being performed, then use @fdname to find the fd.
+ * If CPR is being performed, then ignore @fdname, and look for @name
+ * and @index in CPR state.
+ *
+ * On success returns the fd value, else returns -1.
+ */
+int cpr_get_fd_param(const char *name, const char *fdname, int index,
+ Error **errp)
+{
+ ERRP_GUARD();
+ int fd;
+
+ if (cpr_is_incoming()) {
+ fd = cpr_find_fd(name, index);
+ if (fd < 0) {
+ error_setg(errp, "cannot find saved value for fd %s", fdname);
+ }
+ } else {
+ fd = monitor_fd_param(monitor_cur(), fdname, errp);
+ if (fd >= 0) {
+ cpr_save_fd(name, index, fd);
+ } else {
+ error_prepend(errp, "Could not parse object fd %s:", fdname);
+ }
+ }
+ return fd;
+}
diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c
index 8eaf70d..e982061 100644
--- a/monitor/hmp-cmds-target.c
+++ b/monitor/hmp-cmds-target.c
@@ -102,7 +102,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict)
if (all_cpus) {
CPU_FOREACH(cs) {
monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
- cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+ cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU);
}
} else {
cs = vcpu >= 0 ? qemu_get_cpu(vcpu) : mon_get_cpu(mon);
@@ -117,7 +117,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict)
}
monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index);
- cpu_dump_state(cs, NULL, CPU_DUMP_FPU);
+ cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU);
}
}
diff --git a/qapi/machine.json b/qapi/machine.json
index 0650b8d..f712e7d 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1762,24 +1762,6 @@
'features': [ 'unstable' ] }
##
-# @x-query-opcount:
-#
-# Query TCG opcode counters
-#
-# Features:
-#
-# @unstable: This command is meant for debugging.
-#
-# Returns: TCG opcode counters
-#
-# Since: 6.2
-##
-{ 'command': 'x-query-opcount',
- 'returns': 'HumanReadableText',
- 'if': 'CONFIG_TCG',
- 'features': [ 'unstable' ] }
-
-##
# @x-query-ramblock:
#
# Query system ramblock information
diff --git a/qapi/migration.json b/qapi/migration.json
index 4963f6c..e8a7d3b 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -620,8 +620,10 @@
#
# @cpr-transfer: This mode allows the user to transfer a guest to a
# new QEMU instance on the same host with minimal guest pause
-# time by preserving guest RAM in place. Devices and their pinned
-# pages will also be preserved in a future QEMU release.
+# time by preserving guest RAM in place.
+#
+# Devices and their pinned pages are also preserved for VFIO and
+# IOMMUFD. (since 10.1)
#
# The user starts new QEMU on the same host as old QEMU, with
# command-line arguments to create the same machine, plus the
diff --git a/system/cpus.c b/system/cpus.c
index d16b0df..0d0eec8 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -254,7 +254,7 @@ int64_t cpus_get_elapsed_ticks(void)
return cpu_get_ticks();
}
-static void generic_handle_interrupt(CPUState *cpu, int mask)
+void generic_handle_interrupt(CPUState *cpu, int mask)
{
cpu->interrupt_request |= mask;
@@ -265,11 +265,9 @@ static void generic_handle_interrupt(CPUState *cpu, int mask)
void cpu_interrupt(CPUState *cpu, int mask)
{
- if (cpus_accel->handle_interrupt) {
- cpus_accel->handle_interrupt(cpu, mask);
- } else {
- generic_handle_interrupt(cpu, mask);
- }
+ g_assert(bql_locked());
+
+ cpus_accel->handle_interrupt(cpu, mask);
}
/*
@@ -678,6 +676,8 @@ void cpus_register_accel(const AccelOpsClass *ops)
{
assert(ops != NULL);
assert(ops->create_vcpu_thread != NULL); /* mandatory */
+ assert(ops->handle_interrupt);
+
cpus_accel = ops;
}
diff --git a/system/memory.c b/system/memory.c
index 76b44b8..e8d9b15 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -3501,7 +3501,7 @@ static void mtree_print_flatview(gpointer key, gpointer value,
if (fvi->ac) {
for (i = 0; i < fv_address_spaces->len; ++i) {
as = g_array_index(fv_address_spaces, AddressSpace*, i);
- if (fvi->ac->has_memory(current_machine, as,
+ if (fvi->ac->has_memory(current_machine->accelerator, as,
int128_get64(range->addr.start),
MR_SIZE(range->addr.size) + 1)) {
qemu_printf(" %s", fvi->ac->name);
diff --git a/system/physmem.c b/system/physmem.c
index ff0ca40..130c148 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1593,6 +1593,11 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
return rb->offset;
}
+ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb)
+{
+ return rb->fd_offset;
+}
+
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
{
return rb->used_length;
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 7b6d291..c9cfcdc 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -813,9 +813,9 @@ int hvf_put_registers(CPUState *cpu)
static void flush_cpu_state(CPUState *cpu)
{
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
hvf_put_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
}
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 99e37a3..818b504 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -733,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu)
}
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
hvf_put_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (hvf_inject_interrupts(cpu)) {
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 2057314..17fce1d 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs)
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
- if (!cs->accel->dirty) {
+ if (!cs->vcpu_dirty) {
/* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
}
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
index 2144307..a5517b0 100644
--- a/target/i386/nvmm/nvmm-accel-ops.c
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -87,6 +87,7 @@ static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data)
ops->create_vcpu_thread = nvmm_start_vcpu_thread;
ops->kick_vcpu_thread = nvmm_kick_vcpu_thread;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset;
ops->synchronize_post_init = nvmm_cpu_synchronize_post_init;
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index f1c6120..b4a4d50 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -30,7 +30,6 @@ struct AccelCPUState {
struct nvmm_vcpu vcpu;
uint8_t tpr;
bool stop;
- bool dirty;
/* Window-exiting for INTs/NMIs. */
bool int_window_exit;
@@ -47,7 +46,7 @@ struct qemu_machine {
/* -------------------------------------------------------------------------- */
-static bool nvmm_allowed;
+bool nvmm_allowed;
static struct qemu_machine qemu_mach;
static struct nvmm_machine *
@@ -508,7 +507,7 @@ nvmm_io_callback(struct nvmm_io *io)
}
/* Needed, otherwise infinite loop. */
- current_cpu->accel->dirty = false;
+ current_cpu->vcpu_dirty = false;
}
static void
@@ -517,7 +516,7 @@ nvmm_mem_callback(struct nvmm_mem *mem)
cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
/* Needed, otherwise infinite loop. */
- current_cpu->accel->dirty = false;
+ current_cpu->vcpu_dirty = false;
}
static struct nvmm_assist_callbacks nvmm_callbacks = {
@@ -727,9 +726,9 @@ nvmm_vcpu_loop(CPUState *cpu)
* Inner VCPU loop.
*/
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (qcpu->stop) {
@@ -827,32 +826,32 @@ static void
do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
static void
do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void
do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
{
nvmm_set_registers(cpu);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void
do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
{
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
void nvmm_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -982,7 +981,7 @@ nvmm_init_vcpu(CPUState *cpu)
}
}
- qcpu->dirty = true;
+ qcpu->vcpu_dirty = true;
cpu->accel = qcpu;
return 0;
@@ -1153,7 +1152,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = {
/* -------------------------------------------------------------------------- */
static int
-nvmm_accel_init(MachineState *ms)
+nvmm_accel_init(AccelState *as, MachineState *ms)
{
int ret, err;
@@ -1193,12 +1192,6 @@ nvmm_accel_init(MachineState *ms)
return 0;
}
-int
-nvmm_enabled(void)
-{
- return nvmm_allowed;
-}
-
static void
nvmm_accel_class_init(ObjectClass *oc, const void *data)
{
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index b8bebe4..31cf15f 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -90,6 +90,7 @@ static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data)
ops->create_vcpu_thread = whpx_start_vcpu_thread;
ops->kick_vcpu_thread = whpx_kick_vcpu_thread;
ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle;
+ ops->handle_interrupt = generic_handle_interrupt;
ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset;
ops->synchronize_post_init = whpx_cpu_synchronize_post_init;
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index cf6d3e4..721c478 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -237,13 +237,12 @@ struct AccelCPUState {
uint64_t tpr;
uint64_t apic_base;
bool interruption_pending;
- bool dirty;
/* Must be the last field as it may have a tail */
WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
};
-static bool whpx_allowed;
+bool whpx_allowed;
static bool whp_dispatch_initialized;
static HMODULE hWinHvPlatform, hWinHvEmulation;
static uint32_t max_vcpu_index;
@@ -836,7 +835,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback(
* The emulator just successfully wrote the register state. We clear the
* dirty state so we avoid the double write on resume of the VP.
*/
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
return hr;
}
@@ -1391,7 +1390,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu)
/* Returns the address of the next instruction that is about to be executed. */
static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
{
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
/* The CPU registers have been modified by other parts of QEMU. */
return cpu_env(cpu)->eip;
} else if (exit_context_valid) {
@@ -1704,9 +1703,9 @@ static int whpx_vcpu_run(CPUState *cpu)
}
do {
- if (cpu->accel->dirty) {
+ if (cpu->vcpu_dirty) {
whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
if (exclusive_step_mode == WHPX_STEP_NONE) {
@@ -2054,9 +2053,9 @@ static int whpx_vcpu_run(CPUState *cpu)
static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
whpx_get_registers(cpu);
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
}
@@ -2064,20 +2063,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
run_on_cpu_data arg)
{
whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
run_on_cpu_data arg)
{
whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
- cpu->accel->dirty = false;
+ cpu->vcpu_dirty = false;
}
static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
run_on_cpu_data arg)
{
- cpu->accel->dirty = true;
+ cpu->vcpu_dirty = true;
}
/*
@@ -2086,7 +2085,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
void whpx_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->accel->dirty) {
+ if (!cpu->vcpu_dirty) {
run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -2226,7 +2225,7 @@ int whpx_init_vcpu(CPUState *cpu)
}
vcpu->interruptable = true;
- vcpu->dirty = true;
+ cpu->vcpu_dirty = true;
cpu->accel = vcpu;
max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
qemu_add_vm_change_state_handler(whpx_cpu_update_state, env);
@@ -2505,7 +2504,7 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
* Partition support
*/
-static int whpx_accel_init(MachineState *ms)
+static int whpx_accel_init(AccelState *as, MachineState *ms)
{
struct whpx_state *whpx;
int ret;
@@ -2689,11 +2688,6 @@ error:
return ret;
}
-int whpx_enabled(void)
-{
- return whpx_allowed;
-}
-
bool whpx_apic_in_platform(void) {
return whpx_global.apic_in_platform;
}
diff --git a/tests/functional/aspeed.py b/tests/functional/aspeed.py
index 7a40d5d..b131703 100644
--- a/tests/functional/aspeed.py
+++ b/tests/functional/aspeed.py
@@ -8,8 +8,13 @@ from qemu_test import LinuxKernelTest
class AspeedTest(LinuxKernelTest):
def do_test_arm_aspeed_openbmc(self, machine, image, uboot='2019.04',
- cpu_id='0x0', soc='AST2500 rev A1'):
- hostname = machine.removesuffix('-bmc')
+ cpu_id='0x0', soc='AST2500 rev A1',
+ image_hostname=None):
+ # Allow for the image hostname to not end in "-bmc"
+ if image_hostname is not None:
+ hostname = image_hostname
+ else:
+ hostname = machine.removesuffix('-bmc')
self.set_machine(machine)
self.vm.set_console()
diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index b542b3a..050c900 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -32,6 +32,8 @@ test_timeouts = {
'arm_aspeed_ast2500' : 720,
'arm_aspeed_ast2600' : 1200,
'arm_aspeed_bletchley' : 480,
+ 'arm_aspeed_catalina' : 480,
+ 'arm_aspeed_gb200nvl_bmc' : 480,
'arm_aspeed_rainier' : 480,
'arm_bpim2u' : 500,
'arm_collie' : 180,
@@ -127,6 +129,8 @@ tests_arm_system_thorough = [
'arm_aspeed_ast2500',
'arm_aspeed_ast2600',
'arm_aspeed_bletchley',
+ 'arm_aspeed_catalina',
+ 'arm_aspeed_gb200nvl_bmc',
'arm_aspeed_rainier',
'arm_bpim2u',
'arm_canona1100',
diff --git a/tests/functional/test_arm_aspeed_catalina.py b/tests/functional/test_arm_aspeed_catalina.py
new file mode 100755
index 0000000..dc2f24e
--- /dev/null
+++ b/tests/functional/test_arm_aspeed_catalina.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+#
+# Functional test that boots the ASPEED machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from aspeed import AspeedTest
+
+
+class CatalinaMachine(AspeedTest):
+
+ ASSET_CATALINA_FLASH = Asset(
+ 'https://github.com/legoater/qemu-aspeed-boot/raw/a866feb5ef81245b4827a214584bf6bcc72939f6/images/catalina-bmc/obmc-phosphor-image-catalina-20250619123021.static.mtd.xz',
+ '287402e1ba021991e06be1d098f509444a02a3d81a73a932f66528b159e864f9')
+
+ def test_arm_ast2600_catalina_openbmc(self):
+ image_path = self.uncompress(self.ASSET_CATALINA_FLASH)
+
+ self.do_test_arm_aspeed_openbmc('catalina-bmc', image=image_path,
+ uboot='2019.04', cpu_id='0xf00',
+ soc='AST2600 rev A3')
+
+if __name__ == '__main__':
+ AspeedTest.main()
diff --git a/tests/functional/test_arm_aspeed_gb200nvl_bmc.py b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py
new file mode 100644
index 0000000..8e8e3f0
--- /dev/null
+++ b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+#
+# Functional test that boots the ASPEED machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import Asset
+from aspeed import AspeedTest
+
+
+class GB200Machine(AspeedTest):
+
+ ASSET_GB200_FLASH = Asset(
+ 'https://github.com/legoater/qemu-aspeed-boot/raw/refs/heads/master/images/gb200nvl-obmc/obmc-phosphor-image-gb200nvl-obmc-20250702182348.static.mtd.xz',
+ 'b84819317cb3dc762895ad507705978ef000bfc77c50c33a63bdd37921db0dbc')
+
+ def test_arm_aspeed_gb200_openbmc(self):
+ image_path = self.uncompress(self.ASSET_GB200_FLASH)
+
+ self.do_test_arm_aspeed_openbmc('gb200nvl-bmc', image=image_path,
+ uboot='2019.04', cpu_id='0xf00',
+ soc='AST2600 rev A3',
+ image_hostname='gb200nvl-obmc')
+
+if __name__ == '__main__':
+ AspeedTest.main()
diff --git a/tests/qtest/aspeed_scu-test.c b/tests/qtest/aspeed_scu-test.c
new file mode 100644
index 0000000..ca09f91
--- /dev/null
+++ b/tests/qtest/aspeed_scu-test.c
@@ -0,0 +1,231 @@
+/*
+ * QTest testcase for the ASPEED AST2500 and AST2600 SCU.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (C) 2025 Tan Siewert
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest-single.h"
+
+/*
+ * SCU base, as well as protection key are
+ * the same on AST2500 and 2600.
+ */
+#define AST_SCU_BASE 0x1E6E2000
+#define AST_SCU_PROT_LOCK_STATE 0x0
+#define AST_SCU_PROT_LOCK_VALUE 0x2
+#define AST_SCU_PROT_UNLOCK_STATE 0x1
+#define AST_SCU_PROT_UNLOCK_VALUE 0x1688A8A8
+
+#define AST2500_MACHINE "-machine ast2500-evb"
+#define AST2500_SCU_PROT_REG 0x00
+#define AST2500_SCU_MISC_2_CONTROL_REG 0x4C
+
+#define AST2600_MACHINE "-machine ast2600-evb"
+/* AST2600 has two protection registers */
+#define AST2600_SCU_PROT_REG 0x000
+#define AST2600_SCU_PROT_REG2 0x010
+#define AST2600_SCU_MISC_2_CONTROL_REG 0x0C4
+
+#define TEST_LOCK_ARBITRARY_VALUE 0xABCDEFAB
+
+/**
+ * Assert that a given register matches an expected value.
+ *
+ * Reads the register and checks if its value equals the expected value.
+ *
+ * @param *s - QTest machine state
+ * @param reg - Address of the register to be checked
+ * @param expected - Expected register value
+ */
+static inline void assert_register_eq(QTestState *s,
+ uint32_t reg,
+ uint32_t expected)
+{
+ uint32_t value = qtest_readl(s, reg);
+ g_assert_cmphex(value, ==, expected);
+}
+
+/**
+ * Assert that a given register does not match a specific value.
+ *
+ * Reads the register and checks that its value is not equal to the
+ * provided value.
+ *
+ * @param *s - QTest machine state
+ * @param reg - Address of the register to be checked
+ * @param not_expected - Value the register must not contain
+ */
+static inline void assert_register_neq(QTestState *s,
+ uint32_t reg,
+ uint32_t not_expected)
+{
+ uint32_t value = qtest_readl(s, reg);
+ g_assert_cmphex(value, !=, not_expected);
+}
+
+/**
+ * Test whether the SCU can be locked and unlocked correctly.
+ *
+ * When testing multiple registers, this function assumes that writing
+ * to the first register also affects the others. However, writing to
+ * any other register only affects itself.
+ *
+ * @param *machine - input machine configuration, passed directly
+ * to QTest
+ * @param regs[] - List of registers to be checked
+ * @param regc - amount of arguments for registers to be checked
+ */
+static void test_protection_register(const char *machine,
+ const uint32_t regs[],
+ const int regc)
+{
+ QTestState *s = qtest_init(machine);
+
+ for (int i = 0; i < regc; i++) {
+ uint32_t reg = regs[i];
+
+ qtest_writel(s, reg, AST_SCU_PROT_UNLOCK_VALUE);
+ assert_register_eq(s, reg, AST_SCU_PROT_UNLOCK_STATE);
+
+ /**
+ * Check that other registers are unlocked too, if more
+ * than one is available.
+ */
+ if (regc > 1 && i == 0) {
+ /* Initialise at 1 instead of 0 to skip first */
+ for (int j = 1; j < regc; j++) {
+ uint32_t add_reg = regs[j];
+ assert_register_eq(s, add_reg, AST_SCU_PROT_UNLOCK_STATE);
+ }
+ }
+
+ /* Lock the register again */
+ qtest_writel(s, reg, AST_SCU_PROT_LOCK_VALUE);
+ assert_register_eq(s, reg, AST_SCU_PROT_LOCK_STATE);
+
+ /* And the same for locked state */
+ if (regc > 1 && i == 0) {
+ /* Initialise at 1 instead of 0 to skip first */
+ for (int j = 1; j < regc; j++) {
+ uint32_t add_reg = regs[j];
+ assert_register_eq(s, add_reg, AST_SCU_PROT_LOCK_STATE);
+ }
+ }
+ }
+
+ qtest_quit(s);
+}
+
+static void test_2500_protection_register(void)
+{
+ uint32_t regs[] = { AST_SCU_BASE + AST2500_SCU_PROT_REG };
+
+ test_protection_register(AST2500_MACHINE,
+ regs,
+ ARRAY_SIZE(regs));
+}
+
+static void test_2600_protection_register(void)
+{
+ /**
+ * The AST2600 has two protection registers, both
+ * being required to be unlocked to do any operation.
+ *
+ * Modifying SCU000 also modifies SCU010, but modifying
+ * SCU010 only will keep SCU000 untouched.
+ */
+ uint32_t regs[] = { AST_SCU_BASE + AST2600_SCU_PROT_REG,
+ AST_SCU_BASE + AST2600_SCU_PROT_REG2 };
+
+ test_protection_register(AST2600_MACHINE,
+ regs,
+ ARRAY_SIZE(regs));
+}
+
+/**
+ * Test if SCU register writes are correctly allowed or blocked
+ * depending on the protection register state.
+ *
+ * The test first locks the protection register and verifies that
+ * writes to the target SCU register are rejected. It then unlocks
+ * the protection register and confirms that the written value is
+ * retained when unlocked.
+ *
+ * @param *machine - input machine configuration, passed directly
+ * to QTest
+ * @param protection_register - first SCU protection key register
+ * (only one for keeping it simple)
+ * @param test_register - Register to be used for writing arbitrary
+ * values
+ */
+static void test_write_permission_lock_state(const char *machine,
+ const uint32_t protection_register,
+ const uint32_t test_register)
+{
+ QTestState *s = qtest_init(machine);
+
+ /* Arbitrary value to lock provided SCU protection register */
+ qtest_writel(s, protection_register, AST_SCU_PROT_LOCK_VALUE);
+
+ /* Ensure that the SCU is really locked */
+ assert_register_eq(s, protection_register, AST_SCU_PROT_LOCK_STATE);
+
+ /* Write a known arbitrary value to test that the write is blocked */
+ qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE);
+
+ /* We do not want to have the written value to be saved */
+ assert_register_neq(s, test_register, TEST_LOCK_ARBITRARY_VALUE);
+
+ /**
+ * Unlock the SCU and verify that it can be written to.
+ * Assumes that the first SCU protection register is sufficient to
+ * unlock all protection registers, if multiple are present.
+ */
+ qtest_writel(s, protection_register, AST_SCU_PROT_UNLOCK_VALUE);
+ assert_register_eq(s, protection_register, AST_SCU_PROT_UNLOCK_STATE);
+
+ /* Write a known arbitrary value to test that the write works */
+ qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE);
+
+ /* Ensure that the written value is retained */
+ assert_register_eq(s, test_register, TEST_LOCK_ARBITRARY_VALUE);
+
+ qtest_quit(s);
+}
+
+static void test_2500_write_permission_lock_state(void)
+{
+ test_write_permission_lock_state(
+ AST2500_MACHINE,
+ AST_SCU_BASE + AST2500_SCU_PROT_REG,
+ AST_SCU_BASE + AST2500_SCU_MISC_2_CONTROL_REG
+ );
+}
+
+static void test_2600_write_permission_lock_state(void)
+{
+ test_write_permission_lock_state(
+ AST2600_MACHINE,
+ AST_SCU_BASE + AST2600_SCU_PROT_REG,
+ AST_SCU_BASE + AST2600_SCU_MISC_2_CONTROL_REG
+ );
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ qtest_add_func("/ast2500/scu/protection_register",
+ test_2500_protection_register);
+ qtest_add_func("/ast2600/scu/protection_register",
+ test_2600_protection_register);
+
+ qtest_add_func("/ast2500/scu/write_permission_lock_state",
+ test_2500_write_permission_lock_state);
+ qtest_add_func("/ast2600/scu/write_permission_lock_state",
+ test_2600_write_permission_lock_state);
+
+ return g_test_run();
+}
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 8ad8490..91b4a71 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -215,6 +215,7 @@ qtests_npcm8xx = \
qtests_aspeed = \
['aspeed_gpio-test',
'aspeed_hace-test',
+ 'aspeed_scu-test',
'aspeed_smc-test']
qtests_aspeed64 = \
['ast2700-gpio-test',
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index 040d042..cf71876 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -51,7 +51,6 @@ static int query_error_class(const char *cmd)
{ "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
/* Only valid with accel=tcg */
{ "x-query-jit", ERROR_CLASS_GENERIC_ERROR },
- { "x-query-opcount", ERROR_CLASS_GENERIC_ERROR },
{ "xen-event-list", ERROR_CLASS_GENERIC_ERROR },
{ NULL, -1 }
};