diff options
90 files changed, 2063 insertions, 624 deletions
@@ -11,4 +11,3 @@ prep-perpatch-check-cmd = scripts/checkpatch.pl -q --terse --no-summary --mailback - searchmask = https://lore.kernel.org/qemu-devel/?x=m&t=1&q=%s linkmask = https://lore.kernel.org/qemu-devel/%s - linktrailermask = Message-ID: <%s> diff --git a/MAINTAINERS b/MAINTAINERS index 12efc88..1842c3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -495,6 +495,7 @@ Guest CPU Cores (other accelerators) Overall M: Richard Henderson <richard.henderson@linaro.org> R: Paolo Bonzini <pbonzini@redhat.com> +R: Philippe Mathieu-Daudé <philmd@linaro.org> S: Maintained F: include/exec/cpu*.h F: include/exec/target_long.h @@ -503,6 +504,7 @@ F: include/system/accel-*.h F: include/system/cpus.h F: include/accel/accel-cpu*.h F: accel/accel-*.? +F: accel/dummy-cpus.? F: accel/Makefile.objs F: accel/stubs/Makefile.objs F: cpu-common.c @@ -540,6 +542,7 @@ WHPX CPUs M: Sunil Muthuswamy <sunilmut@microsoft.com> S: Supported F: target/i386/whpx/ +F: accel/stubs/whpx-stub.c F: include/system/whpx.h X86 Instruction Emulator @@ -586,6 +589,7 @@ NetBSD Virtual Machine Monitor (NVMM) CPU support M: Reinoud Zandijk <reinoud@netbsd.org> S: Maintained F: include/system/nvmm.h +F: accel/stubs/nvmm-stub.c F: target/i386/nvmm/ Hosts @@ -227,6 +227,7 @@ distclean: clean recurse-distclean rm -Rf .sdk qemu-bundle find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \ + -path "$(SRC_PATH)/.pc" -prune -o \ -type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \) .PHONY: ctags diff --git a/accel/accel-common.c b/accel/accel-common.c index 4894b98..591ff4c 100644 --- a/accel/accel-common.c +++ b/accel/accel-common.c @@ -124,7 +124,7 @@ int accel_supported_gdbstub_sstep_flags(void) AccelState *accel = current_accel(); AccelClass *acc = ACCEL_GET_CLASS(accel); if (acc->gdbstub_supported_sstep_flags) { - return acc->gdbstub_supported_sstep_flags(); + return acc->gdbstub_supported_sstep_flags(accel); } return 0; } diff --git a/accel/accel-system.c b/accel/accel-system.c index a0f562a..af713cc 100644 --- a/accel/accel-system.c +++ b/accel/accel-system.c @@ -37,7 +37,7 @@ int accel_init_machine(AccelState *accel, MachineState *ms) int ret; ms->accelerator = accel; *(acc->allowed) = true; - ret = acc->init_machine(ms); + ret = acc->init_machine(accel, ms); if (ret < 0) { ms->accelerator = NULL; *(acc->allowed) = false; @@ -58,7 +58,7 @@ void accel_setup_post(MachineState *ms) AccelState *accel = ms->accelerator; AccelClass *acc = ACCEL_GET_CLASS(accel); if (acc->setup_post) { - acc->setup_post(ms, accel); + acc->setup_post(accel); } } @@ -85,8 +85,9 @@ void accel_init_ops_interfaces(AccelClass *ac) * non-NULL create_vcpu_thread operation. */ ops = ACCEL_OPS_CLASS(oc); + ac->ops = ops; if (ops->ops_init) { - ops->ops_init(ops); + ops->ops_init(ac); } cpus_register_accel(ops); } diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index 8672761..03cfc0f 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -17,6 +17,7 @@ #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" +#include "accel/dummy-cpus.h" static void *dummy_cpu_thread_fn(void *arg) { diff --git a/accel/dummy-cpus.h b/accel/dummy-cpus.h new file mode 100644 index 0000000..d18dd0f --- /dev/null +++ b/accel/dummy-cpus.h @@ -0,0 +1,14 @@ +/* + * Dummy cpu thread code + * + * Copyright IBM, Corp. 2011 + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef ACCEL_DUMMY_CPUS_H +#define ACCEL_DUMMY_CPUS_H + +void dummy_start_vcpu_thread(CPUState *cpu); + +#endif diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index b389772..be8724a 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -48,18 +48,16 @@ */ #include "qemu/osdep.h" -#include "qemu/error-report.h" +#include "qemu/guest-random.h" #include "qemu/main-loop.h" -#include "system/address-spaces.h" +#include "qemu/queue.h" #include "gdbstub/enums.h" -#include "hw/boards.h" +#include "exec/cpu-common.h" +#include "hw/core/cpu.h" #include "system/accel-ops.h" #include "system/cpus.h" #include "system/hvf.h" #include "system/hvf_int.h" -#include "system/runstate.h" -#include "qemu/guest-random.h" -#include "trace.h" HVFState *hvf_state; @@ -79,143 +77,17 @@ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) return NULL; } -struct mac_slot { - int present; - uint64_t size; - uint64_t gpa_start; - uint64_t gva; -}; - -struct mac_slot mac_slots[32]; - -static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) -{ - struct mac_slot *macslot; - hv_return_t ret; - - macslot = &mac_slots[slot->slot_id]; - - if (macslot->present) { - if (macslot->size != slot->size) { - macslot->present = 0; - trace_hvf_vm_unmap(macslot->gpa_start, macslot->size); - ret = hv_vm_unmap(macslot->gpa_start, macslot->size); - assert_hvf_ok(ret); - } - } - - if (!slot->size) { - return 0; - } - - macslot->present = 1; - macslot->gpa_start = slot->start; - macslot->size = slot->size; - trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, - flags & HV_MEMORY_READ ? 'R' : '-', - flags & HV_MEMORY_WRITE ? 'W' : '-', - flags & HV_MEMORY_EXEC ? 'E' : '-'); - ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); - assert_hvf_ok(ret); - return 0; -} - -static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) -{ - hvf_slot *mem; - MemoryRegion *area = section->mr; - bool writable = !area->readonly && !area->rom_device; - hv_memory_flags_t flags; - uint64_t page_size = qemu_real_host_page_size(); - - if (!memory_region_is_ram(area)) { - if (writable) { - return; - } else if (!memory_region_is_romd(area)) { - /* - * If the memory device is not in romd_mode, then we actually want - * to remove the hvf memory slot so all accesses will trap. - */ - add = false; - } - } - - if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || - !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { - /* Not page aligned, so we can not map as RAM */ - add = false; - } - - mem = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - if (mem && add) { - if (mem->size == int128_get64(section->size) && - mem->start == section->offset_within_address_space && - mem->mem == (memory_region_get_ram_ptr(area) + - section->offset_within_region)) { - return; /* Same region was attempted to register, go away. */ - } - } - - /* Region needs to be reset. set the size to 0 and remap it. */ - if (mem) { - mem->size = 0; - if (do_hvf_set_memory(mem, 0)) { - error_report("Failed to reset overlapping slot"); - abort(); - } - } - - if (!add) { - return; - } - - if (area->readonly || - (!memory_region_is_ram(area) && memory_region_is_romd(area))) { - flags = HV_MEMORY_READ | HV_MEMORY_EXEC; - } else { - flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - } - - /* Now make a new slot. */ - int x; - - for (x = 0; x < hvf_state->num_slots; ++x) { - mem = &hvf_state->slots[x]; - if (!mem->size) { - break; - } - } - - if (x == hvf_state->num_slots) { - error_report("No free slots"); - abort(); - } - - mem->size = int128_get64(section->size); - mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; - mem->start = section->offset_within_address_space; - mem->region = area; - - if (do_hvf_set_memory(mem, flags)) { - error_report("Error registering new memory slot"); - abort(); - } -} - static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { hvf_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } static void hvf_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -224,7 +96,7 @@ static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu, run_on_cpu_data arg) { /* QEMU state is the reference, push it to HVF now and on next entry */ - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void hvf_cpu_synchronize_post_reset(CPUState *cpu) @@ -242,147 +114,10 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); } -static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) -{ - hvf_slot *slot; - - slot = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - /* protect region against writes; begin tracking it */ - if (on) { - slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_EXEC); - /* stop tracking region*/ - } else { - slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); - } -} - -static void hvf_log_start(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (old != 0) { - return; - } - - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_log_stop(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (new != 0) { - return; - } - - hvf_set_dirty_tracking(section, 0); -} - -static void hvf_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - /* - * sync of dirty pages is handled elsewhere; just make sure we keep - * tracking the region. - */ - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, true); -} - -static void hvf_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, false); -} - -static MemoryListener hvf_memory_listener = { - .name = "hvf", - .priority = MEMORY_LISTENER_PRIORITY_ACCEL, - .region_add = hvf_region_add, - .region_del = hvf_region_del, - .log_start = hvf_log_start, - .log_stop = hvf_log_stop, - .log_sync = hvf_log_sync, -}; - static void dummy_signal(int sig) { } -bool hvf_allowed; - -static int hvf_accel_init(MachineState *ms) -{ - int x; - hv_return_t ret; - HVFState *s; - int pa_range = 36; - MachineClass *mc = MACHINE_GET_CLASS(ms); - - if (mc->hvf_get_physical_address_range) { - pa_range = mc->hvf_get_physical_address_range(ms); - if (pa_range < 0) { - return -EINVAL; - } - } - - ret = hvf_arch_vm_create(ms, (uint32_t)pa_range); - assert_hvf_ok(ret); - - s = g_new0(HVFState, 1); - - s->num_slots = ARRAY_SIZE(s->slots); - for (x = 0; x < s->num_slots; ++x) { - s->slots[x].size = 0; - s->slots[x].slot_id = x; - } - - QTAILQ_INIT(&s->hvf_sw_breakpoints); - - hvf_state = s; - memory_listener_register(&hvf_memory_listener, &address_space_memory); - - return hvf_arch_init(); -} - -static inline int hvf_gdbstub_sstep_flags(void) -{ - return SSTEP_ENABLE | SSTEP_NOIRQ; -} - -static void hvf_accel_class_init(ObjectClass *oc, const void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HVF"; - ac->init_machine = hvf_accel_init; - ac->allowed = &hvf_allowed; - ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags; -} - -static const TypeInfo hvf_accel_type = { - .name = TYPE_HVF_ACCEL, - .parent = TYPE_ACCEL, - .instance_size = sizeof(HVFState), - .class_init = hvf_accel_class_init, -}; - -static void hvf_type_init(void) -{ - type_register_static(&hvf_accel_type); -} - -type_init(hvf_type_init); - static void hvf_vcpu_destroy(CPUState *cpu) { hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd); @@ -415,8 +150,8 @@ static int hvf_init_vcpu(CPUState *cpu) #else r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT); #endif - cpu->accel->dirty = true; assert_hvf_ok(r); + cpu->vcpu_dirty = true; cpu->accel->guest_debug_enabled = false; @@ -482,6 +217,34 @@ static void hvf_start_vcpu_thread(CPUState *cpu) cpu, QEMU_THREAD_JOINABLE); } +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc) +{ + struct hvf_sw_breakpoint *bp; + + QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) { + if (bp->pc == pc) { + return bp; + } + } + return NULL; +} + +int hvf_sw_breakpoints_active(CPUState *cpu) +{ + return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints); +} + +static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg) +{ + hvf_arch_update_guest_debug(cpu); +} + +int hvf_update_guest_debug(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL); + return 0; +} + static int hvf_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len) { struct hvf_sw_breakpoint *bp; @@ -590,6 +353,7 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = hvf_start_vcpu_thread; ops->kick_vcpu_thread = hvf_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset; ops->synchronize_post_init = hvf_cpu_synchronize_post_init; @@ -609,8 +373,10 @@ static const TypeInfo hvf_accel_ops_type = { .class_init = hvf_accel_ops_class_init, .abstract = true, }; + static void hvf_accel_ops_register_types(void) { type_register_static(&hvf_accel_ops_type); } + type_init(hvf_accel_ops_register_types); diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index 8c387fd..b6075c0 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -10,9 +10,24 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "system/hvf.h" #include "system/hvf_int.h" #include "hw/core/cpu.h" +#include "hw/boards.h" +#include "trace.h" + +bool hvf_allowed; + +struct mac_slot { + int present; + uint64_t size; + uint64_t gpa_start; + uint64_t gva; +}; + +struct mac_slot mac_slots[32]; const char *hvf_return_string(hv_return_t ret) { @@ -42,30 +57,254 @@ void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, abort(); } -struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc) +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) { - struct hvf_sw_breakpoint *bp; + struct mac_slot *macslot; + hv_return_t ret; - QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) { - if (bp->pc == pc) { - return bp; + macslot = &mac_slots[slot->slot_id]; + + if (macslot->present) { + if (macslot->size != slot->size) { + macslot->present = 0; + trace_hvf_vm_unmap(macslot->gpa_start, macslot->size); + ret = hv_vm_unmap(macslot->gpa_start, macslot->size); + assert_hvf_ok(ret); } } - return NULL; + + if (!slot->size) { + return 0; + } + + macslot->present = 1; + macslot->gpa_start = slot->start; + macslot->size = slot->size; + trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, + flags & HV_MEMORY_READ ? 'R' : '-', + flags & HV_MEMORY_WRITE ? 'W' : '-', + flags & HV_MEMORY_EXEC ? 'E' : '-'); + ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); + assert_hvf_ok(ret); + return 0; } -int hvf_sw_breakpoints_active(CPUState *cpu) +static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { - return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints); + hvf_slot *mem; + MemoryRegion *area = section->mr; + bool writable = !area->readonly && !area->rom_device; + hv_memory_flags_t flags; + uint64_t page_size = qemu_real_host_page_size(); + + if (!memory_region_is_ram(area)) { + if (writable) { + return; + } else if (!memory_region_is_romd(area)) { + /* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; + } + } + + if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || + !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { + /* Not page aligned, so we can not map as RAM */ + add = false; + } + + mem = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + if (mem && add) { + if (mem->size == int128_get64(section->size) && + mem->start == section->offset_within_address_space && + mem->mem == (memory_region_get_ram_ptr(area) + + section->offset_within_region)) { + return; /* Same region was attempted to register, go away. */ + } + } + + /* Region needs to be reset. set the size to 0 and remap it. */ + if (mem) { + mem->size = 0; + if (do_hvf_set_memory(mem, 0)) { + error_report("Failed to reset overlapping slot"); + abort(); + } + } + + if (!add) { + return; + } + + if (area->readonly || + (!memory_region_is_ram(area) && memory_region_is_romd(area))) { + flags = HV_MEMORY_READ | HV_MEMORY_EXEC; + } else { + flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; + } + + /* Now make a new slot. */ + int x; + + for (x = 0; x < hvf_state->num_slots; ++x) { + mem = &hvf_state->slots[x]; + if (!mem->size) { + break; + } + } + + if (x == hvf_state->num_slots) { + error_report("No free slots"); + abort(); + } + + mem->size = int128_get64(section->size); + mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; + mem->start = section->offset_within_address_space; + mem->region = area; + + if (do_hvf_set_memory(mem, flags)) { + error_report("Error registering new memory slot"); + abort(); + } } -static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg) +static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) { - hvf_arch_update_guest_debug(cpu); + hvf_slot *slot; + + slot = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + /* protect region against writes; begin tracking it */ + if (on) { + slot->flags |= HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_EXEC); + /* stop tracking region*/ + } else { + slot->flags &= ~HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); + } } -int hvf_update_guest_debug(CPUState *cpu) +static void hvf_log_start(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) { - run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL); - return 0; + if (old != 0) { + return; + } + + hvf_set_dirty_tracking(section, 1); } + +static void hvf_log_stop(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (new != 0) { + return; + } + + hvf_set_dirty_tracking(section, 0); +} + +static void hvf_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + /* + * sync of dirty pages is handled elsewhere; just make sure we keep + * tracking the region. + */ + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, true); +} + +static void hvf_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, false); +} + +static MemoryListener hvf_memory_listener = { + .name = "hvf", + .priority = MEMORY_LISTENER_PRIORITY_ACCEL, + .region_add = hvf_region_add, + .region_del = hvf_region_del, + .log_start = hvf_log_start, + .log_stop = hvf_log_stop, + .log_sync = hvf_log_sync, +}; + +static int hvf_accel_init(AccelState *as, MachineState *ms) +{ + int x; + hv_return_t ret; + HVFState *s; + int pa_range = 36; + MachineClass *mc = MACHINE_GET_CLASS(ms); + + if (mc->hvf_get_physical_address_range) { + pa_range = mc->hvf_get_physical_address_range(ms); + if (pa_range < 0) { + return -EINVAL; + } + } + + ret = hvf_arch_vm_create(ms, (uint32_t)pa_range); + assert_hvf_ok(ret); + + s = g_new0(HVFState, 1); + + s->num_slots = ARRAY_SIZE(s->slots); + for (x = 0; x < s->num_slots; ++x) { + s->slots[x].size = 0; + s->slots[x].slot_id = x; + } + + QTAILQ_INIT(&s->hvf_sw_breakpoints); + + hvf_state = s; + memory_listener_register(&hvf_memory_listener, &address_space_memory); + + return hvf_arch_init(); +} + +static int hvf_gdbstub_sstep_flags(AccelState *as) +{ + return SSTEP_ENABLE | SSTEP_NOIRQ; +} + +static void hvf_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "HVF"; + ac->init_machine = hvf_accel_init; + ac->allowed = &hvf_allowed; + ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags; +} + +static const TypeInfo hvf_accel_type = { + .name = TYPE_HVF_ACCEL, + .parent = TYPE_ACCEL, + .instance_size = sizeof(HVFState), + .class_init = hvf_accel_class_init, +}; + +static void hvf_type_init(void) +{ + type_register_static(&hvf_accel_type); +} + +type_init(hvf_type_init); diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index e5c1544..0eafc90 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -101,6 +101,7 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, const void *data) ops->synchronize_post_init = kvm_cpu_synchronize_post_init; ops->synchronize_state = kvm_cpu_synchronize_state; ops->synchronize_pre_loadvm = kvm_cpu_synchronize_pre_loadvm; + ops->handle_interrupt = generic_handle_interrupt; #ifdef TARGET_KVM_HAVE_GUEST_DEBUG ops->update_guest_debug = kvm_update_guest_debug_ops; diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index d095d1b..a106d1b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -453,7 +453,13 @@ static void kvm_reset_parked_vcpus(KVMState *s) } } -int kvm_create_vcpu(CPUState *cpu) +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +static int kvm_create_vcpu(CPUState *cpu) { unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); KVMState *s = kvm_state; @@ -515,16 +521,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) goto err; } + /* If I am the CPU that created coalesced_mmio_ring, then discard it */ + if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) { + s->coalesced_mmio_ring = NULL; + } + ret = munmap(cpu->kvm_run, mmap_size); if (ret < 0) { goto err; } + cpu->kvm_run = NULL; if (cpu->kvm_dirty_gfns) { ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes); if (ret < 0) { goto err; } + cpu->kvm_dirty_gfns = NULL; } kvm_park_vcpu(cpu); @@ -608,6 +621,31 @@ err: return ret; } +void kvm_close(void) +{ + CPUState *cpu; + + if (!kvm_state || kvm_state->fd == -1) { + return; + } + + CPU_FOREACH(cpu) { + cpu_remove_sync(cpu); + close(cpu->kvm_fd); + cpu->kvm_fd = -1; + close(cpu->kvm_vcpu_stats_fd); + cpu->kvm_vcpu_stats_fd = -1; + } + + if (kvm_state && kvm_state->fd != -1) { + close(kvm_state->vmfd); + kvm_state->vmfd = -1; + close(kvm_state->fd); + kvm_state->fd = -1; + } + kvm_state = NULL; +} + /* * dirty pages logging control */ @@ -2464,13 +2502,10 @@ uint32_t kvm_dirty_ring_size(void) return kvm_state->kvm_dirty_ring_size; } -static int do_kvm_create_vm(MachineState *ms, int type) +static int do_kvm_create_vm(KVMState *s, int type) { - KVMState *s; int ret; - s = KVM_STATE(ms->accelerator); - do { ret = kvm_ioctl(s, KVM_CREATE_VM, type); } while (ret == -EINTR); @@ -2567,7 +2602,7 @@ static int kvm_setup_dirty_ring(KVMState *s) return 0; } -static int kvm_init(MachineState *ms) +static int kvm_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); static const char upgrade_note[] = @@ -2582,15 +2617,13 @@ static int kvm_init(MachineState *ms) { /* end of list */ } }, *nc = num_cpus; int soft_vcpus_limit, hard_vcpus_limit; - KVMState *s; + KVMState *s = KVM_STATE(as); const KVMCapabilityInfo *missing_cap; int ret; int type; qemu_mutex_init(&kml_slots_lock); - s = KVM_STATE(ms->accelerator); - /* * On systems where the kernel can support different base page * sizes, host page size may be different from TARGET_PAGE_SIZE, @@ -2642,7 +2675,7 @@ static int kvm_init(MachineState *ms) goto err; } - ret = do_kvm_create_vm(ms, type); + ret = do_kvm_create_vm(s, type); if (ret < 0) { goto err; } @@ -3785,10 +3818,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) return r; } -static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, +static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as, hwaddr start_addr, hwaddr size) { - KVMState *kvm = KVM_STATE(ms->accelerator); + KVMState *kvm = KVM_STATE(accel); int i; for (i = 0; i < kvm->nr_as; ++i) { @@ -3979,7 +4012,7 @@ static void kvm_accel_instance_init(Object *obj) * Returns: SSTEP_* flags that KVM supports for guest debug. The * support is probed during kvm_init() */ -static int kvm_gdbstub_sstep_flags(void) +static int kvm_gdbstub_sstep_flags(AccelState *as) { return kvm_sstep_flags; } diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index 92bed92..2b83126 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -24,6 +24,7 @@ #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" +#include "accel/dummy-cpus.h" static int64_t qtest_clock_counter; @@ -37,7 +38,7 @@ static void qtest_set_virtual_clock(int64_t count) qatomic_set_i64(&qtest_clock_counter, count); } -static int qtest_init_accel(MachineState *ms) +static int qtest_init_accel(AccelState *as, MachineState *ms) { return 0; } @@ -66,6 +67,7 @@ static void qtest_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = dummy_start_vcpu_thread; ops->get_virtual_clock = qtest_get_virtual_clock; ops->set_virtual_clock = qtest_set_virtual_clock; + ops->handle_interrupt = generic_handle_interrupt; }; static const TypeInfo qtest_accel_ops_type = { diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index ecfd763..68cd33b 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -29,10 +29,6 @@ void kvm_flush_coalesced_mmio_buffer(void) { } -void kvm_cpu_synchronize_state(CPUState *cpu) -{ -} - bool kvm_has_sync_mmu(void) { return false; @@ -105,11 +101,6 @@ unsigned int kvm_get_free_memslots(void) return 0; } -void kvm_init_cpu_signals(CPUState *cpu) -{ - abort(); -} - bool kvm_arm_supports_user_irq(void) { return false; diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build index 8ca1a45..9dfc4f9 100644 --- a/accel/stubs/meson.build +++ b/accel/stubs/meson.build @@ -3,5 +3,7 @@ system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) +system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c')) +system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c')) specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss) diff --git a/accel/stubs/nvmm-stub.c b/accel/stubs/nvmm-stub.c new file mode 100644 index 0000000..ec14837 --- /dev/null +++ b/accel/stubs/nvmm-stub.c @@ -0,0 +1,12 @@ +/* + * NVMM stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/nvmm.h" + +bool nvmm_allowed; diff --git a/accel/stubs/whpx-stub.c b/accel/stubs/whpx-stub.c new file mode 100644 index 0000000..c564c89 --- /dev/null +++ b/accel/stubs/whpx-stub.c @@ -0,0 +1,12 @@ +/* + * WHPX stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/whpx.h" + +bool whpx_allowed; diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index 1dbc45d..77a3a06 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -139,4 +139,6 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); +void tcg_dump_stats(GString *buf); + #endif diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index 1c182b6..e7ed728 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -141,16 +141,26 @@ static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) *pelide = elide; } -static void tcg_dump_info(GString *buf) +static void tcg_dump_flush_info(GString *buf) { - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); + size_t flush_full, flush_part, flush_elide; + + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); + + tlb_flush_counts(&flush_full, &flush_part, &flush_elide); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); } static void dump_exec_info(GString *buf) { struct tb_tree_stats tst = {}; struct qht_stats hst; - size_t nb_tbs, flush_full, flush_part, flush_elide; + size_t nb_tbs; tcg_tb_foreach(tb_tree_stats_iter, &tst); nb_tbs = tst.nb_tbs; @@ -187,50 +197,26 @@ static void dump_exec_info(GString *buf) qht_statistics_destroy(&hst); g_string_append_printf(buf, "\nStatistics:\n"); - g_string_append_printf(buf, "TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - g_string_append_printf(buf, "TB invalidate count %u\n", - qatomic_read(&tb_ctx.tb_phys_invalidate_count)); - - tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); - g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); - g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(buf); + tcg_dump_flush_info(buf); } -HumanReadableText *qmp_x_query_jit(Error **errp) +void tcg_dump_stats(GString *buf) { - g_autoptr(GString) buf = g_string_new(""); - - if (!tcg_enabled()) { - error_setg(errp, "JIT information is only available with accel=tcg"); - return NULL; - } - dump_accel_info(buf); dump_exec_info(buf); dump_drift_info(buf); - - return human_readable_text_from_str(buf); -} - -static void tcg_dump_op_count(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); } -HumanReadableText *qmp_x_query_opcount(Error **errp) +HumanReadableText *qmp_x_query_jit(Error **errp) { g_autoptr(GString) buf = g_string_new(""); if (!tcg_enabled()) { - error_setg(errp, - "Opcode count information is only available with accel=tcg"); + error_setg(errp, "JIT information is only available with accel=tcg"); return NULL; } - tcg_dump_op_count(buf); + tcg_dump_stats(buf); return human_readable_text_from_str(buf); } @@ -238,7 +224,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp) static void hmp_tcg_register(void) { monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); - monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount); } type_init(hmp_tcg_register); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index b24d6a7..37b4b21 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -93,8 +93,6 @@ static void tcg_cpu_reset_hold(CPUState *cpu) /* mask must never be zero, except for A20 change call */ void tcg_handle_interrupt(CPUState *cpu, int mask) { - g_assert(bql_locked()); - cpu->interrupt_request |= mask; /* @@ -198,8 +196,10 @@ static inline void tcg_remove_all_breakpoints(CPUState *cpu) cpu_watchpoint_remove_all(cpu, BP_GDB); } -static void tcg_accel_ops_init(AccelOpsClass *ops) +static void tcg_accel_ops_init(AccelClass *ac) { + AccelOpsClass *ops = ac->ops; + if (qemu_tcg_mttcg_enabled()) { ops->create_vcpu_thread = mttcg_start_vcpu_thread; ops->kick_vcpu_thread = mttcg_kick_vcpu_thread; diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 6e5dc33..5904582 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -80,9 +80,9 @@ static void tcg_accel_instance_init(Object *obj) bool one_insn_per_tb; -static int tcg_init_machine(MachineState *ms) +static int tcg_init_machine(AccelState *as, MachineState *ms) { - TCGState *s = TCG_STATE(current_accel()); + TCGState *s = TCG_STATE(as); unsigned max_threads = 1; #ifndef CONFIG_USER_ONLY @@ -219,7 +219,7 @@ static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp) qatomic_set(&one_insn_per_tb, value); } -static int tcg_gdbstub_supported_sstep_flags(void) +static int tcg_gdbstub_supported_sstep_flags(AccelState *as) { /* * In replay mode all events will come from the log and can't be diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c index de52a8f..bd0ff64 100644 --- a/accel/xen/xen-all.c +++ b/accel/xen/xen-all.c @@ -18,6 +18,7 @@ #include "hw/xen/xen_igd.h" #include "chardev/char.h" #include "qemu/accel.h" +#include "accel/dummy-cpus.h" #include "system/accel-ops.h" #include "system/cpus.h" #include "system/xen.h" @@ -63,7 +64,7 @@ static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) xen_igd_gfx_pt_set(value, errp); } -static void xen_setup_post(MachineState *ms, AccelState *accel) +static void xen_setup_post(AccelState *as) { int rc; @@ -76,7 +77,7 @@ static void xen_setup_post(MachineState *ms, AccelState *accel) } } -static int xen_init(MachineState *ms) +static int xen_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -152,6 +153,7 @@ static void xen_accel_ops_class_init(ObjectClass *oc, const void *data) AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = dummy_start_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; } static const TypeInfo xen_accel_ops_type = { diff --git a/backends/iommufd.c b/backends/iommufd.c index c2c47ab..2a33c7a 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -16,12 +16,18 @@ #include "qemu/module.h" #include "qom/object_interfaces.h" #include "qemu/error-report.h" +#include "migration/cpr.h" #include "monitor/monitor.h" #include "trace.h" #include "hw/vfio/vfio-device.h" #include <sys/ioctl.h> #include <linux/iommufd.h> +static const char *iommufd_fd_name(IOMMUFDBackend *be) +{ + return object_get_canonical_path_component(OBJECT(be)); +} + static void iommufd_backend_init(Object *obj) { IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); @@ -64,26 +70,73 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc) return !be->users; } +static void iommufd_backend_complete(UserCreatable *uc, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); + const char *name = iommufd_fd_name(be); + + if (!be->owned) { + /* fd came from the command line. Fetch updated value from cpr state. */ + if (cpr_is_incoming()) { + be->fd = cpr_find_fd(name, 0); + } else { + cpr_save_fd(name, 0, be->fd); + } + } +} + static void iommufd_backend_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ucc->can_be_deleted = iommufd_backend_can_be_deleted; + ucc->complete = iommufd_backend_complete; object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); } +bool iommufd_change_process_capable(IOMMUFDBackend *be) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + + /* + * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl. + * This is a no-op if the process has not changed since DMA was mapped. + */ + return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); +} + +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); + + if (!ret) { + error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed", + be->fd); + } + trace_iommufd_change_process(be->fd, ret); + return ret; +} + bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) { int fd; if (be->owned && !be->users) { - fd = qemu_open("/dev/iommu", O_RDWR, errp); + fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp); if (fd < 0) { return false; } be->fd = fd; } + if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) { + if (be->owned) { + close(be->fd); + be->fd = -1; + } + return false; + } be->users++; trace_iommufd_backend_connect(be->fd, be->owned, be->users); @@ -96,9 +149,13 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) goto out; } be->users--; - if (!be->users && be->owned) { - close(be->fd); - be->fd = -1; + if (!be->users) { + vfio_iommufd_cpr_unregister_iommufd(be); + if (be->owned) { + cpr_delete_fd(iommufd_fd_name(be), 0); + close(be->fd); + be->fd = -1; + } } out: trace_iommufd_backend_disconnect(be->fd, be->users); @@ -172,6 +229,44 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, return ret; } +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, + int mfd, unsigned long start, bool readonly) +{ + int ret, fd = be->fd; + struct iommu_ioas_map_file map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .iova = iova, + .length = size, + }; + + if (cpr_is_incoming()) { + return 0; + } + + if (!readonly) { + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map); + trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start, + readonly, ret); + if (ret) { + ret = -errno; + + /* TODO: Not support mapping hardware PCI BAR region for now. */ + if (errno == EFAULT) { + warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?"); + } + } + return ret; +} + int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size) { @@ -183,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, .length = size, }; + if (cpr_is_incoming()) { + return 0; + } + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); /* * IOMMUFD takes mapping as some kind of object, unmapping diff --git a/backends/trace-events b/backends/trace-events index 7278214..56132d3 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -7,10 +7,12 @@ dbus_vmstate_loading(const char *id) "id: %s" dbus_vmstate_saving(const char *id) "id: %s" # iommufd.c +iommufd_change_process(int fd, bool ret) "fd=%d (%d)" iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" +iommufd_backend_map_file_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int fd, unsigned long start, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" fd=%d start=%ld readonly=%d (%d)" iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" diff --git a/bsd-user/main.c b/bsd-user/main.c index 7c0a059..d0cc8e0 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -474,7 +474,7 @@ int main(int argc, char **argv) opt_one_insn_per_tb, &error_abort); object_property_set_int(OBJECT(accel), "tb-size", opt_tb_size, &error_abort); - ac->init_machine(NULL); + ac->init_machine(accel, NULL); } /* diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 4203713..5a3ed71 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -315,6 +315,14 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name better reflects the way this property affects all random data within the device tree blob, not just the ``kaslr-seed`` node. +Arm ``ast2700a0-evb`` machine (since 10.1) +'''''''''''''''''''''''''''''''''''''''''' + +The ``ast2700a0-evb`` machine represents the first revision of the AST2700 +and serves as the initial engineering sample rather than a production version. +A newer revision, A1, is now supported, and the ``ast2700a1-evb`` should +replace the older A0 version. + Mips ``mipssim`` machine (since 10.0) ''''''''''''''''''''''''''''''''''''' diff --git a/docs/devel/migration/CPR.rst b/docs/devel/migration/CPR.rst index 7897873..0a0fd4f 100644 --- a/docs/devel/migration/CPR.rst +++ b/docs/devel/migration/CPR.rst @@ -152,8 +152,7 @@ cpr-transfer mode This mode allows the user to transfer a guest to a new QEMU instance on the same host with minimal guest pause time, by preserving guest RAM in place, albeit with new virtual addresses in new QEMU. Devices -and their pinned memory pages will also be preserved in a future QEMU -release. +and their pinned memory pages are also preserved for VFIO and IOMMUFD. The user starts new QEMU on the same host as old QEMU, with command- line arguments to create the same machine, plus the ``-incoming`` @@ -322,6 +321,6 @@ Futures cpr-transfer mode is based on a capability to transfer open file descriptors from old to new QEMU. In the future, descriptors for -vfio, iommufd, vhost, and char devices could be transferred, +vhost, and char devices could be transferred, preserving those devices and their kernel state without interruption, even if they do not explicitly support live migration. diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index 43d27d8..bec0a1d 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -1,5 +1,4 @@ -Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) -================================================================================================================================================================================================================================================================================================================================================================================================================================= +Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``gb200nvl-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) The QEMU Aspeed machines model BMCs of various OpenPOWER systems and Aspeed evaluation boards. They are based on different releases of the @@ -35,6 +34,7 @@ AST2600 SoC based machines : - ``fuji-bmc`` Facebook Fuji BMC - ``bletchley-bmc`` Facebook Bletchley BMC - ``fby35-bmc`` Facebook fby35 BMC +- ``gb200nvl-bmc`` Nvidia GB200nvl BMC - ``qcom-dc-scm-v1-bmc`` Qualcomm DC-SCM V1 BMC - ``qcom-firework-bmc`` Qualcomm Firework BMC diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 639a450..d797922 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -256,20 +256,6 @@ SRST Show dynamic compiler info. ERST -#if defined(CONFIG_TCG) - { - .name = "opcount", - .args_type = "", - .params = "", - .help = "show dynamic compiler opcode counters", - }, -#endif - -SRST - ``info opcount`` - Show dynamic compiler opcode counters -ERST - { .name = "sync-profile", .args_type = "mean:-m,no_coalesce:-n,max:i?", diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index f543d94..6ea8653 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -532,6 +532,7 @@ config ASPEED_SOC select I2C select DPS310 select PCA9552 + select PCA9554 select SERIAL_MM select SMBUS_EEPROM select PCA954X diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index d0b3336..c31bbe7 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -19,6 +19,7 @@ #include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/gpio/pca9552.h" +#include "hw/gpio/pca9554.h" #include "hw/nvram/eeprom_at24c.h" #include "hw/sensor/tmp105.h" #include "hw/misc/led.h" @@ -197,9 +198,12 @@ struct AspeedMachineState { #define FUJI_BMC_HW_STRAP2 0x00000000 /* Bletchley hardware value */ -/* TODO: Leave same as EVB for now. */ -#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 -#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 +#define BLETCHLEY_BMC_HW_STRAP1 0x00002000 +#define BLETCHLEY_BMC_HW_STRAP2 0x00000801 + +/* GB200NVL hardware value */ +#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 +#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 /* Qualcomm DC-SCM hardware value */ #define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000 @@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine) aspeed_board_init_flashes(&bmc->soc->spi[0], bmc->spi_model ? bmc->spi_model : amc->spi_model, 1, amc->num_cs); + aspeed_board_init_flashes(&bmc->soc->spi[1], + amc->spi2_model, 1, amc->num_cs2); } if (machine->kernel_filename && sc->num_cpus > 1) { @@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) TYPE_PCA9552, addr); } +static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr) +{ + return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id), + TYPE_PCA9554, addr); +} + static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc) } #define TYPE_TMP421 "tmp421" +#define TYPE_DS1338 "ds1338" + +/* Catalina hardware value */ +#define CATALINA_BMC_HW_STRAP1 0x00002002 +#define CATALINA_BMC_HW_STRAP2 0x00000800 + +#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) + +static void catalina_bmc_i2c_init(AspeedMachineState *bmc) +{ + /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */ + + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[16] = {}; + I2CSlave *i2c_mux; + + /* busses 0-15 are all used. */ + for (int i = 0; i < ARRAY_SIZE(i2c); i++) { + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* &i2c0 */ + /* i2c-mux@71 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71); + + /* i2c-mux@72 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72); + + /* i2c0mux1ch1 */ + /* io_expander7 - pca9535@20 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x20); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@73 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73); + + /* i2c-mux@75 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75); + + /* i2c-mux@76 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76); + + /* i2c0mux4ch1 */ + /* io_expander8 - pca9535@21 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x21); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@77 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77); + + + /* &i2c1 */ + /* i2c-mux@70 (PCA9548) on i2c1 */ + i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70); + /* i2c1mux0ch0 */ + /* ina238@41 - no model */ + /* ina238@42 - no model */ + /* ina238@44 - no model */ + /* i2c1mux0ch1 */ + /* ina238@41 - no model */ + /* ina238@43 - no model */ + /* i2c1mux0ch4 */ + /* ltc4287@42 - no model */ + /* ltc4287@43 - no model */ + + /* i2c1mux0ch5 */ + /* eeprom@54 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB); + /* tpm75@4f */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f); + + /* i2c1mux0ch6 */ + /* io_expander5 - pca9554@27 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9554, 0x27); + /* io_expander6 - pca9555@25 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9552, 0x25); + /* eeprom@51 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB); + + /* i2c1mux0ch7 */ + /* eeprom@53 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB); + /* temperature-sensor@4b - tmp75 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b); + + /* &i2c2 */ + /* io_expander0 - pca9555@20 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20); + /* io_expander0 - pca9555@21 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21); + /* io_expander0 - pca9555@27 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27); + /* eeprom@50 */ + at24c_eeprom_init(i2c[2], 0x50, 8 * KiB); + /* eeprom@51 */ + at24c_eeprom_init(i2c[2], 0x51, 8 * KiB); + + /* &i2c5 */ + /* i2c-mux@70 (PCA9548) on i2c5 */ + i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70); + /* i2c5mux0ch6 */ + /* eeprom@52 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB); + /* i2c5mux0ch7 */ + /* ina230@40 - no model */ + /* ina230@41 - no model */ + /* ina230@44 - no model */ + /* ina230@45 - no model */ + + /* &i2c6 */ + /* io_expander3 - pca9555@21 */ + i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21); + /* rtc@6f - nct3018y */ + i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f); + + /* &i2c9 */ + /* io_expander4 - pca9555@4f */ + i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f); + /* temperature-sensor@4b - tpm75 */ + i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b); + /* eeprom@50 */ + at24c_eeprom_init(i2c[9], 0x50, 8 * KiB); + /* eeprom@56 */ + at24c_eeprom_init(i2c[9], 0x56, 8 * KiB); + + /* &i2c10 */ + /* temperature-sensor@1f - tpm421 */ + i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f); + /* eeprom@50 */ + at24c_eeprom_init(i2c[10], 0x50, 8 * KiB); + + /* &i2c11 */ + /* ssif-bmc@10 - no model */ + + /* &i2c12 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[12], 0x50, 8 * KiB); + + /* &i2c13 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[13], 0x50, 8 * KiB); + /* eeprom@54 */ + at24c_eeprom_init(i2c[13], 0x54, 256); + /* eeprom@55 */ + at24c_eeprom_init(i2c[13], 0x55, 256); + /* eeprom@57 */ + at24c_eeprom_init(i2c[13], 0x57, 256); + + /* &i2c14 */ + /* io_expander9 - pca9555@10 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10); + /* io_expander10 - pca9555@11 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11); + /* io_expander11 - pca9555@12 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12); + /* io_expander12 - pca9555@13 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13); + /* io_expander13 - pca9555@14 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14); + /* io_expander14 - pca9555@15 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15); + + /* &i2c15 */ + /* temperature-sensor@1f - tmp421 */ + i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f); + /* eeprom@52 */ + at24c_eeprom_init(i2c[15], 0x52, 8 * KiB); +} static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) { @@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67); } + +static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[15] = {}; + DeviceState *dev; + for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) { + if ((i == 11) || (i == 12) || (i == 13)) { + continue; + } + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* Bus 5 Expander */ + create_pca9554(soc, 4, 0x21); + + /* Mux I2c Expanders */ + i2c_slave_create_simple(i2c[5], "pca9546", 0x71); + i2c_slave_create_simple(i2c[5], "pca9546", 0x72); + i2c_slave_create_simple(i2c[5], "pca9546", 0x73); + i2c_slave_create_simple(i2c[5], "pca9546", 0x75); + i2c_slave_create_simple(i2c[5], "pca9546", 0x76); + i2c_slave_create_simple(i2c[5], "pca9546", 0x77); + + /* Bus 10 */ + dev = DEVICE(create_pca9554(soc, 9, 0x20)); + + /* Set FPGA_READY */ + object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal); + + create_pca9554(soc, 9, 0x21); + at24c_eeprom_init(i2c[9], 0x50, 64 * KiB); + at24c_eeprom_init(i2c[9], 0x51, 64 * KiB); + + /* Bus 11 */ + at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid, + gb200nvl_bmc_fruid_len); +} + static void fby35_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, aspeed_machine_class_init_cpus_defaults(mc); } +static void aspeed_machine_catalina_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Catalina BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = CATALINA_BMC_HW_STRAP1; + amc->hw_strap2 = CATALINA_BMC_HW_STRAP2; + amc->fmc_model = "w25q01jvq"; + amc->spi_model = NULL; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC2_ON; + amc->i2c_init = catalina_bmc_i2c_init; + mc->auto_create_sdcard = true; + mc->default_ram_size = CATALINA_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + +#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB) + +static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1; + amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2; + amc->fmc_model = "mx66u51235f"; + amc->spi_model = "mx66u51235f"; + amc->num_cs = 2; + + amc->spi2_model = "mx66u51235f"; + amc->num_cs2 = 1; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; + amc->i2c_init = gb200nvl_bmc_i2c_init; + mc->default_ram_size = GB200NVL_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + static void fby35_reset(MachineState *state, ResetType type) { AspeedMachineState *bmc = ASPEED_MACHINE(state); @@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = { .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_bletchley_class_init, }, { + .name = MACHINE_TYPE_NAME("gb200nvl-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_gb200nvl_class_init, + }, { + .name = MACHINE_TYPE_NAME("catalina-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_catalina_class_init, + }, { .name = MACHINE_TYPE_NAME("fby35-bmc"), .parent = MACHINE_TYPE_NAME("ast2600-evb"), .class_init = aspeed_machine_fby35_class_init, diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index daa3d32..8bbbdec 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = { 0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, }; +const uint8_t gb200nvl_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f, + 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33, + 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, + 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, + 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, + 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a, + 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a, + 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44, + 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43, + 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30, + 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31, + 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31, + 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +}; + const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid); const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid); const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid); @@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid); const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid); const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid); const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid); +const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid); + diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h index f08c16e..3ed9bc1 100644 --- a/hw/arm/aspeed_eeprom.h +++ b/hw/arm/aspeed_eeprom.h @@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len; extern const uint8_t rainier_bmc_fruid[]; extern const size_t rainier_bmc_fruid_len; +extern const uint8_t gb200nvl_bmc_fruid[]; +extern const size_t gb200nvl_bmc_fruid_len; + #endif diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index eb65bda..14d23e2 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -13,7 +13,7 @@ config SGX config TDX bool select X86_FW_OVMF - depends on KVM + depends on KVM && X86_64 config PC bool diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 4930e00..a0ab5ee 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -91,6 +91,7 @@ #define BMC_DEV_ID TO_REG(0x1A4) #define AST2600_PROT_KEY TO_REG(0x00) +#define AST2600_PROT_KEY2 TO_REG(0x10) #define AST2600_SILICON_REV TO_REG(0x04) #define AST2600_SILICON_REV2 TO_REG(0x14) #define AST2600_SYS_RST_CTRL TO_REG(0x40) @@ -176,6 +177,7 @@ #define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330) #define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334) #define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388) +#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0) #define SCU_IO_REGION_SIZE 0x1000 @@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, int reg = TO_REG(offset); /* Truncate here so bitwise operations below behave as expected */ uint32_t data = data64; + bool prot_data_state = data == ASPEED_SCU_PROT_KEY; + bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2]; if (reg >= ASPEED_AST2600_SCU_NR_REGS) { qemu_log_mask(LOG_GUEST_ERROR, @@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, return; } - if (reg > PROT_KEY && !s->regs[PROT_KEY]) { + if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) { qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__); + return; } trace_aspeed_scu_write(offset, size, data); switch (reg) { case AST2600_PROT_KEY: - s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; + /* + * Writing a value to SCU000 will modify both protection + * registers to each protection register individually. + */ + s->regs[AST2600_PROT_KEY] = prot_data_state; + s->regs[AST2600_PROT_KEY2] = prot_data_state; + return; + case AST2600_PROT_KEY2: + s->regs[AST2600_PROT_KEY2] = prot_data_state; return; case AST2600_HW_STRAP1: case AST2600_HW_STRAP2: @@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset, s->regs[reg - 1] ^= data; updated = true; break; + case AST2700_SCUIO_FREQ_CNT_CTL: + s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1))); + updated = true; + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n", @@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = { [AST2700_SCUIO_UARTCLK_GEN] = 0x00014506, [AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0, [AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2, + [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080, }; static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data) diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c index f04d993..dff7cc3 100644 --- a/hw/misc/aspeed_sdmc.c +++ b/hw/misc/aspeed_sdmc.c @@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev) /* Set ram size bit and defaults values */ s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0); + /* Skipping dram init */ + s->regs[R_MAIN_CONTROL] = BIT(16); + if (s->unlocked) { s->regs[R_2700_PROT] = PROT_UNLOCKED; } diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index 3133fef..d318e6a 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -13,7 +13,6 @@ #include "hw/vfio-user/container.h" #include "hw/vfio-user/device.h" #include "hw/vfio-user/trace.h" -#include "hw/vfio/vfio-cpr.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" #include "qapi/error.h" @@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, bcontainer = &container->bcontainer; - if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - ret = ram_block_uncoordinated_discard_disable(true); if (ret) { error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + goto free_container_exit; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); @@ -261,9 +256,6 @@ listener_release_exit: enable_discards_exit: ram_block_uncoordinated_discard_disable(false); -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); - free_container_exit: object_unref(container); @@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container) vioc->release(bcontainer); } - vfio_cpr_unregister_container(bcontainer); object_unref(container); vfio_address_space_put(space); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1df4438..7719f24 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); } static void vfio_ap_unrealize(DeviceState *dev) @@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev) vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); - g_free(vapdev->vdev.name); + vfio_device_free_name(&vapdev->vdev); } static const Property vfio_ap_properties[] = { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index cea9d6e..9560b8d 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -619,7 +619,7 @@ out_io_notifier_err: out_region_err: vfio_device_detach(vbasedev); out_attach_dev_err: - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); @@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev) vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_device_detach(&vcdev->vdev); - g_free(vcdev->vdev.name); + vfio_device_free_name(&vcdev->vdev); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index d834bd4..5630497 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + RAMBlock *rb = mr->ram_block; + int mfd = rb ? qemu_ram_get_fd(rb) : -1; + if (mfd >= 0 && vioc->dma_map_file) { + unsigned long start = vaddr - qemu_ram_get_host_addr(rb); + unsigned long offset = qemu_ram_get_fd_offset(rb); + + return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, + readonly); + } g_assert(vioc->dma_map); return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c new file mode 100644 index 0000000..148a06d --- /dev/null +++ b/hw/vfio/cpr-iommufd.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2024-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "system/iommufd.h" +#include "vfio-iommufd.h" +#include "trace.h" + +typedef struct CprVFIODevice { + char *name; + unsigned int namelen; + uint32_t ioas_id; + int devid; + uint32_t hwpt_id; + QLIST_ENTRY(CprVFIODevice) next; +} CprVFIODevice; + +static const VMStateDescription vmstate_cpr_vfio_device = { + .name = "cpr vfio device", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(namelen, CprVFIODevice), + VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen), + VMSTATE_INT32(devid, CprVFIODevice), + VMSTATE_UINT32(ioas_id, CprVFIODevice), + VMSTATE_UINT32(hwpt_id, CprVFIODevice), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device, + CprVFIODevice, next), + VMSTATE_END_OF_LIST() + } +}; + +static void vfio_cpr_save_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = g_new0(CprVFIODevice, 1); + + elem->name = g_strdup(vbasedev->name); + elem->namelen = strlen(vbasedev->name) + 1; + elem->ioas_id = vbasedev->cpr.ioas_id; + elem->devid = vbasedev->devid; + elem->hwpt_id = vbasedev->cpr.hwpt_id; + QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next); +} + +static CprVFIODevice *find_device(const char *name) +{ + CprVFIODeviceList *head = &cpr_state.vfio_devices; + CprVFIODevice *elem; + + QLIST_FOREACH(elem, head, next) { + if (!strcmp(elem->name, name)) { + return elem; + } + } + return NULL; +} + +static void vfio_cpr_delete_device(const char *name) +{ + CprVFIODevice *elem = find_device(name); + + if (elem) { + QLIST_REMOVE(elem, next); + g_free(elem->name); + g_free(elem); + } +} + +static bool vfio_cpr_find_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = find_device(vbasedev->name); + + if (elem) { + vbasedev->cpr.ioas_id = elem->ioas_id; + vbasedev->devid = elem->devid; + vbasedev->cpr.hwpt_id = elem->hwpt_id; + trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id); + return true; + } + return false; +} + +static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) +{ + if (!iommufd_change_process_capable(be)) { + if (errp) { + error_setg(errp, "vfio iommufd backend does not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + } + return false; + } + return true; +} + +static int iommufd_cpr_pre_save(void *opaque) +{ + IOMMUFDBackend *be = opaque; + + /* + * The process has not changed yet, but proactively try the ioctl, + * and it will fail if any DMA mappings are not supported. + */ + if (!iommufd_change_process_capable(be)) { + error_report("some memory regions do not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + return -1; + } + return 0; +} + +static int iommufd_cpr_post_load(void *opaque, int version_id) +{ + IOMMUFDBackend *be = opaque; + Error *local_err = NULL; + + if (!iommufd_change_process(be, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static const VMStateDescription iommufd_cpr_vmstate = { + .name = "iommufd", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = iommufd_cpr_pre_save, + .post_load = iommufd_cpr_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) +{ + Error **cpr_blocker = &be->cpr_blocker; + + if (!vfio_cpr_supported(be, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); + + return true; +} + +void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) +{ + vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); + migrate_del_blocker(&be->cpr_blocker); +} + +bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, + Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + vfio_cpr_add_kvm_notifier(); + + return true; +} + +void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} + +void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) +{ + if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); + vfio_cpr_save_device(vbasedev); + } +} + +void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) +{ + cpr_delete_fd(vbasedev->name, 0); + vfio_cpr_delete_device(vbasedev->name); +} + +void vfio_cpr_load_device(VFIODevice *vbasedev) +{ + if (cpr_is_incoming()) { + bool ret = vfio_cpr_find_device(vbasedev); + g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } + } +} diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index a84c324..553b203 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id) { VFIOContainer *container = opaque; VFIOContainerBase *bcontainer = &container->bcontainer; - VFIOGroup *group; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; Error *local_err = NULL; + /* During incoming CPR, divert calls to dma_map. */ + vioc->dma_map = vfio_legacy_cpr_dma_map; + if (!vfio_listener_register(bcontainer, &local_err)) { error_report_err(local_err); return -1; } - QLIST_FOREACH(group, &container->group_list, container_next) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + /* Restore original dma_map function */ + vioc->dma_map = saved_dma_map; - /* Restore original dma_map function */ - vioc->dma_map = container->cpr.saved_dma_map; - } return 0; } @@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, */ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; vioc->dma_map = vfio_legacy_cpr_dma_map; container->cpr.remap_listener = (MemoryListener) { @@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, bcontainer->space->as); memory_listener_unregister(&container->cpr.remap_listener); container->cpr.vaddr_unmapped = false; - vioc->dma_map = container->cpr.saved_dma_map; + vioc->dma_map = saved_dma_map; } return 0; } @@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) MIG_MODE_CPR_TRANSFER, -1) == 0; } - vmstate_register(NULL, -1, &vfio_container_vmstate, container); + vfio_cpr_add_kvm_notifier(); - /* During incoming CPR, divert calls to dma_map. */ - if (cpr_is_incoming()) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - container->cpr.saved_dma_map = vioc->dma_map; - vioc->dma_map = vfio_legacy_cpr_dma_map; - } + vmstate_register(NULL, -1, &vfio_container_vmstate, container); migration_add_notifier_mode(&container->cpr.transfer_notifier, vfio_cpr_fail_notifier, diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index fdbb58e..af0f12a 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -9,6 +9,8 @@ #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-cpr.h" #include "hw/vfio/pci.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" #include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" @@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, return 0; } -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp) +#define STRDUP_VECTOR_FD_NAME(vdev, name) \ + g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name)) + +void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr, + int fd) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_save_fd(fdname, nr, fd); +} + +int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + return cpr_find_fd(fdname, nr); +} + +void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) { - migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, - vfio_cpr_reboot_notifier, - MIG_MODE_CPR_REBOOT); - return true; + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_delete_fd(fdname, nr); } -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) +static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, + bool msix) { - migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + int i, fd; + bool pending = false; + PCIDevice *pdev = &vdev->pdev; + + vdev->nr_vectors = nr_vectors; + vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors); + vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI; + + vfio_pci_prepare_kvm_msi_virq_batch(vdev); + + for (i = 0; i < nr_vectors; i++) { + VFIOMSIVector *vector = &vdev->msi_vectors[i]; + + fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i); + if (fd >= 0) { + vfio_pci_vector_init(vdev, i); + vfio_pci_msi_set_handler(vdev, i); + } + + if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) { + vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix); + } else { + vdev->msi_vectors[i].virq = -1; + } + + if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) { + set_bit(i, vdev->msix->pending); + pending = true; + } + } + + vfio_pci_commit_kvm_msi_virq_batch(vdev); + + if (msix) { + memory_region_set_enabled(&pdev->msix_pba_mmio, pending); + } } /* @@ -58,13 +110,91 @@ static int vfio_cpr_pci_pre_load(void *opaque) return 0; } +static int vfio_cpr_pci_post_load(void *opaque, int version_id) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int nr_vectors; + + if (msix_enabled(pdev)) { + vfio_pci_msix_set_notifiers(vdev); + nr_vectors = vdev->msix->entries; + vfio_cpr_claim_vectors(vdev, nr_vectors, true); + + } else if (msi_enabled(pdev)) { + nr_vectors = msi_nr_vectors_allocated(pdev); + vfio_cpr_claim_vectors(vdev, nr_vectors, false); + + } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { + Error *local_err = NULL; + if (!vfio_pci_intx_enable(vdev, &local_err)) { + error_report_err(local_err); + return -1; + } + } + + return 0; +} + +static bool pci_msix_present(void *opaque, int version_id) +{ + PCIDevice *pdev = opaque; + + return msix_present(pdev); +} + +static const VMStateDescription vfio_intx_vmstate = { + .name = "vfio-cpr-intx", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_BOOL(pending, VFIOINTx), + VMSTATE_UINT32(route.mode, VFIOINTx), + VMSTATE_INT32(route.irq, VFIOINTx), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_VFIO_INTX(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(VFIOINTx), \ + .vmsd = &vfio_intx_vmstate, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \ +} + const VMStateDescription vfio_cpr_pci_vmstate = { .name = "vfio-cpr-pci", .version_id = 0, .minimum_version_id = 0, .pre_load = vfio_cpr_pci_pre_load, + .post_load = vfio_cpr_pci_post_load, .needed = cpr_incoming_needed, .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), + VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present), + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), VMSTATE_END_OF_LIST() } }; + +static NotifierWithReturn kvm_close_notifier; + +static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, + Error **errp) +{ + if (e->type == MIG_EVENT_PRECOPY_DONE) { + vfio_kvm_device_close(); + } + return 0; +} + +void vfio_cpr_add_kvm_notifier(void) +{ + if (!kvm_close_notifier.notify) { + migration_add_notifier_mode(&kvm_close_notifier, + vfio_cpr_kvm_close_notifier, + MIG_MODE_CPR_TRANSFER); + } +} diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d91c695..96cf214 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -28,6 +28,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "monitor/monitor.h" #include "vfio-helpers.h" @@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break. + * The fd number changes across processes, so this cannot be + * used as an invariant name for CPR. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, + errp, MIG_MODE_CPR_TRANSFER, + -1) == 0; + } } } return true; } -void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +void vfio_device_free_name(VFIODevice *vbasedev) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); + g_clear_pointer(&vbasedev->name, g_free); + migrate_del_blocker(&vbasedev->cpr.id_blocker); +} - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +{ + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index d0dbab1..9a5f621 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, int vfio_kvm_device_fd = -1; #endif +void vfio_kvm_device_close(void) +{ +#ifdef CONFIG_KVM + kvm_close(); + if (vfio_kvm_device_fd != -1) { + close(vfio_kvm_device_fd); + vfio_kvm_device_fd = -1; + } +#endif +} + int vfio_kvm_device_add_fd(int fd, Error **errp) { #ifdef CONFIG_KVM diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c new file mode 100644 index 0000000..0be5276 --- /dev/null +++ b/hw/vfio/iommufd-stubs.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpr.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index d3efef7..48c590b 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -25,6 +25,7 @@ #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/cpr.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" @@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, iova, size, vaddr, readonly); } +static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_file_dma(container->be, + container->ioas_id, + iova, size, fd, start, readonly); +} + static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) @@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) goto err_kvm_device_add; } + if (cpr_is_incoming()) { + goto skip_bind; + } + /* Bind device to iommufd */ bind.iommufd = iommufd->fd; if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { @@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) vbasedev->devid = bind.out_devid; trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, vbasedev->fd, vbasedev->devid); + +skip_bind: return true; err_bind: iommufd_cdev_kvm_device_del(vbasedev); @@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, /* Try to find a domain */ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (!cpr_is_incoming()) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { + ret = 0; + } else { + continue; + } + if (ret) { /* -EINVAL means the domain is incompatible with the device. */ if (ret == -EINVAL) { @@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } else { vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); return true; @@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } + if (cpr_is_incoming()) { + hwpt_id = vbasedev->cpr.hwpt_id; + goto skip_alloc; + } + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, container->ioas_id, flags, IOMMU_HWPT_DATA_NONE, 0, NULL, @@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt_id); + return false; + } + +skip_alloc: hwpt = g_malloc0(sizeof(*hwpt)); hwpt->hwpt_id = hwpt_id; hwpt->hwpt_flags = flags; QLIST_INIT(&hwpt->device_list); - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); - if (ret) { - iommufd_backend_free_id(container->be, hwpt->hwpt_id); - g_free(hwpt); - return false; - } - vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); @@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, return iommufd_cdev_autodomains_get(vbasedev, container, errp); } - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* If CPR, we are already attached to ioas_id. */ + return cpr_is_incoming() || + !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - vfio_cpr_unregister_container(bcontainer); + vfio_iommufd_cpr_unregister_container(container); vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); @@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; int ret, devfd; + bool res; uint32_t ioas_id; Error *err = NULL; const VFIOIOMMUClass *iommufd_vioc = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + vfio_cpr_load_device(vbasedev); + if (vbasedev->fd < 0) { devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); if (devfd < 0) { @@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, vbasedev->iommufd != container->be) { continue; } - if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { + + if (!cpr_is_incoming()) { + res = iommufd_cdev_attach_container(vbasedev, container, &err); + } else if (vbasedev->cpr.ioas_id == container->ioas_id) { + res = true; + } else { + continue; + } + + if (!res) { const char *msg = error_get_pretty(err); trace_iommufd_cdev_fail_attach_existing_container(msg); @@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } } + if (cpr_is_incoming()) { + ioas_id = vbasedev->cpr.ioas_id; + goto skip_ioas_alloc; + } + /* Need to allocate a new dedicated container */ if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { goto err_alloc_ioas; @@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc: container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; QLIST_INIT(&container->hwpt_list); + vbasedev->cpr.ioas_id = ioas_id; bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -611,6 +665,7 @@ found_container: } vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } @@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); vioc->dma_map = iommufd_cdev_map; + vioc->dma_map_file = iommufd_cdev_map_file; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 63ea393..bfaf6be 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( )) system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( 'iommufd.c', + 'cpr-iommufd.c', )) +system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c')) system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', )) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index fa25bde..1093b28 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,6 +29,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "hw/vfio/vfio-cpr.h" #include "migration/vmstate.h" #include "migration/cpr.h" #include "qobject/qdict.h" @@ -57,20 +58,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +/* Create new or reuse existing eventfd */ static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr, Error **errp) { - int ret = event_notifier_init(e, 0); + int fd, ret; + fd = vfio_cpr_load_vector_fd(vdev, name, nr); + if (fd >= 0) { + event_notifier_init_fd(e, fd); + return true; + } + + ret = event_notifier_init(e, 0); if (ret) { error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + return false; } - return !ret; + + fd = event_notifier_get_fd(e); + vfio_cpr_save_vector_fd(vdev, name, nr, fd); + return true; } static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr) { + vfio_cpr_delete_vector_fd(vdev, name, nr); event_notifier_cleanup(e); } @@ -196,6 +210,36 @@ fail: #endif } +static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) +{ +#ifdef CONFIG_KVM + if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_resamplefds_enabled()) { + return true; + } + + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { + return false; + } + + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &vdev->intx.interrupt, + &vdev->intx.unmask, + vdev->intx.route.irq)) { + error_setg_errno(errp, errno, "failed to setup resample irqfd"); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); + return false; + } + + vdev->intx.kvm_accel = true; + trace_vfio_intx_enable_kvm(vdev->vbasedev.name); + return true; +#else + return true; +#endif +} + static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM @@ -291,7 +335,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) return true; } - vfio_disable_interrupts(vdev); + /* + * Do not alter interrupt state during vfio_realize and cpr load. + * The incoming state is cleared thereafter. + */ + if (!cpr_is_incoming()) { + vfio_disable_interrupts(vdev); + } vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ pci_config_set_interrupt_pin(vdev->pdev.config, pin); @@ -314,6 +364,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev); + + if (cpr_is_incoming()) { + if (!vfio_cpr_intx_enable_kvm(vdev, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); + } + goto skip_signaling; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); @@ -325,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } +skip_signaling: vdev->interrupt = VFIO_INT_INTx; trace_vfio_intx_enable(vdev->vbasedev.name); @@ -394,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque) notify(&vdev->pdev, nr); } +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + int fd = event_notifier_get_fd(&vector->interrupt); + + qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector); +} + /* * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid * fd to kernel. @@ -656,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, static int vfio_msix_vector_use(PCIDevice *pdev, unsigned int nr, MSIMessage msg) { + /* + * Ignore the callback from msix_set_vector_notifiers during resume. + * The necessary subset of these actions is called from + * vfio_cpr_claim_vectors during post load. + */ + if (cpr_is_incoming()) { + return 0; + } + return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt); } @@ -686,6 +762,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev) +{ + msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + vfio_msix_vector_release, NULL); +} + void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); @@ -2914,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev) vfio_device_detach(&vdev->vbasedev); - g_free(vdev->vbasedev.name); + vfio_device_free_name(&vdev->vbasedev); g_free(vdev->msix); } @@ -2965,6 +3047,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->err_notifier); qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3032,6 +3119,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->req_notifier); qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + vdev->req_enabled = true; + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3189,7 +3282,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) vfio_intx_routing_notifier); vdev->irqchip_change_notifier.notify = vfio_irqchip_change; kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - if (!vfio_intx_enable(vdev, errp)) { + + /* + * During CPR, do not call vfio_intx_enable at this time. Instead, + * call it from vfio_pci_post_load after the intx routing data has + * been loaded from vmstate. + */ + if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) { timer_free(vdev->intx.mmap_timer); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 5ba7330..495fae7 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -218,6 +218,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev); void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev); bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev); +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr); uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 9a21f2e..5c1795a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { /* @fd takes precedence over @sysfsdev which takes precedence over @host */ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); } else if (vbasedev->fd < 0) { if (!vbasedev->name || strchr(vbasedev->name, '/')) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e1728c4..8ec0ad0 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -197,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +# cpr-iommufd.c +vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u" + # device.c vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" vfio_device_reset_handler(void) "" diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index a684855..9b658a3 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -85,6 +85,7 @@ void qemu_ram_unset_idstr(RAMBlock *block); const char *qemu_ram_get_idstr(RAMBlock *rb); void *qemu_ram_get_host_addr(RAMBlock *rb); ram_addr_t qemu_ram_get_offset(RAMBlock *rb); +ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb); ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); ram_addr_t qemu_ram_get_max_length(RAMBlock *rb); bool qemu_ram_is_shared(RAMBlock *rb); diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h index 973277b..6c36455 100644 --- a/include/hw/arm/aspeed.h +++ b/include/hw/arm/aspeed.h @@ -35,7 +35,9 @@ struct AspeedMachineClass { uint32_t hw_strap2; const char *fmc_model; const char *spi_model; + const char *spi2_model; uint32_t num_cs; + uint32_t num_cs2; uint32_t macs_mask; void (*i2c_init)(AspeedMachineState *bmc); uint32_t uart_default; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 162a56a..5eaf41a 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -442,6 +442,7 @@ struct qemu_work_item; * @opaque: User data. * @mem_io_pc: Host Program Counter at which the memory was accessed. * @accel: Pointer to accelerator specific state. + * @vcpu_dirty: Hardware accelerator is not synchronized with QEMU state * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to @work_list. * @work_list: List of pending asynchronous work. @@ -538,7 +539,6 @@ struct CPUState { uint32_t kvm_fetch_index; uint64_t dirty_pages; int kvm_vcpu_stats_fd; - bool vcpu_dirty; /* Use by accel-block: CPU is executing an ioctl() */ QemuLockCnt in_ioctl_lock; @@ -554,6 +554,7 @@ struct CPUState { uint32_t halted; int32_t exception_index; + bool vcpu_dirty; AccelCPUState *accel; /* Used to keep track of an outstanding cpu throttle thread for migration diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3cd86ec..bded6e9 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -168,6 +168,21 @@ struct VFIOIOMMUClass { hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr); /** + * @dma_map_file + * + * Map a file range for the container. + * + * @bcontainer: #VFIOContainerBase to use for map + * @iova: start address to map + * @size: size of the range to map + * @fd: descriptor of the file to map + * @start: starting file offset of the range to map + * @readonly: map read only if true + */ + int (*dma_map_file)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly); + /** * @dma_unmap * * Unmap an address range from the container. diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 8bf85b9..80ad20d 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -15,19 +15,27 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +struct VFIODevice; +struct VFIOPCIDevice; +struct VFIOIOMMUFDContainer; +struct IOMMUFDBackend; + +typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, void *vaddr, + bool readonly, MemoryRegion *mr); typedef struct VFIOContainerCPR { Error *blocker; bool vaddr_unmapped; NotifierWithReturn transfer_notifier; MemoryListener remap_listener; - int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly, MemoryRegion *mr); } VFIOContainerCPR; typedef struct VFIODeviceCPR { Error *mdev_blocker; + Error *id_blocker; + uint32_t hwpt_id; + uint32_t ioas_id; } VFIODeviceCPR; bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, @@ -37,9 +45,15 @@ void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp); -bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, - Error **errp); -void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); +bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container, + Error **errp); +void vfio_iommufd_cpr_unregister_container( + struct VFIOIOMMUFDContainer *container); +bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp); +void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be); +void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev); +void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev); +void vfio_cpr_load_device(struct VFIODevice *vbasedev); int vfio_cpr_group_get_device_fd(int d, const char *name); @@ -52,6 +66,16 @@ void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, bool vfio_cpr_ram_discard_register_listener( struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); +void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr, int fd); +int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); +void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); + extern const VMStateDescription vfio_cpr_pci_vmstate; +extern const VMStateDescription vmstate_cpr_vfio_devices; + +void vfio_cpr_add_kvm_notifier(void); #endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index c616652..1901a35 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -279,8 +279,11 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, /* Returns 0 on success, or a negative errno. */ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +void vfio_device_free_name(VFIODevice *vbasedev); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); int vfio_device_get_aw_bits(VFIODevice *vdev); + +void vfio_kvm_device_close(void); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/migration/cpr.h b/include/migration/cpr.h index 07858e9..3fc19a7 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -9,11 +9,23 @@ #define MIGRATION_CPR_H #include "qapi/qapi-types-migration.h" +#include "qemu/queue.h" #define MIG_MODE_NONE -1 #define QEMU_CPR_FILE_MAGIC 0x51435052 #define QEMU_CPR_FILE_VERSION 0x00000001 +#define CPR_STATE "CprState" + +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList; +typedef QLIST_HEAD(CprVFIODeviceList, CprVFIODevice) CprVFIODeviceList; + +typedef struct CprState { + CprFdList fds; + CprVFIODeviceList vfio_devices; +} CprState; + +extern CprState cpr_state; void cpr_save_fd(const char *name, int id, int fd); void cpr_delete_fd(const char *name, int id); @@ -32,6 +44,8 @@ void cpr_state_close(void); struct QIOChannel *cpr_state_ioc(void); bool cpr_incoming_needed(void *opaque); +int cpr_get_fd_param(const char *name, const char *fdname, int index, + Error **errp); QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp); QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp); diff --git a/include/qemu/accel.h b/include/qemu/accel.h index fbd3d89..1c097ac 100644 --- a/include/qemu/accel.h +++ b/include/qemu/accel.h @@ -37,17 +37,20 @@ typedef struct AccelClass { /*< public >*/ const char *name; - int (*init_machine)(MachineState *ms); + /* Cached by accel_init_ops_interfaces() when created */ + AccelOpsClass *ops; + + int (*init_machine)(AccelState *as, MachineState *ms); bool (*cpu_common_realize)(CPUState *cpu, Error **errp); void (*cpu_common_unrealize)(CPUState *cpu); /* system related hooks */ - void (*setup_post)(MachineState *ms, AccelState *accel); - bool (*has_memory)(MachineState *ms, AddressSpace *as, + void (*setup_post)(AccelState *as); + bool (*has_memory)(AccelState *accel, AddressSpace *as, hwaddr start_addr, hwaddr size); /* gdbstub related hooks */ - int (*gdbstub_supported_sstep_flags)(void); + int (*gdbstub_supported_sstep_flags)(AccelState *as); bool *allowed; /* diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h index 4c99d25..a786c7d 100644 --- a/include/system/accel-ops.h +++ b/include/system/accel-ops.h @@ -10,6 +10,7 @@ #ifndef ACCEL_OPS_H #define ACCEL_OPS_H +#include "qemu/accel.h" #include "exec/vaddr.h" #include "qom/object.h" @@ -31,7 +32,7 @@ struct AccelOpsClass { /*< public >*/ /* initialization function called when accel is chosen */ - void (*ops_init)(AccelOpsClass *ops); + void (*ops_init)(AccelClass *ac); bool (*cpus_are_resettable)(void); void (*cpu_reset_hold)(CPUState *cpu); @@ -40,12 +41,29 @@ struct AccelOpsClass { void (*kick_vcpu_thread)(CPUState *cpu); bool (*cpu_thread_is_idle)(CPUState *cpu); + /** + * synchronize_post_reset: + * synchronize_post_init: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers to the hardware accelerator + * (QEMU is the reference). + */ void (*synchronize_post_reset)(CPUState *cpu); void (*synchronize_post_init)(CPUState *cpu); + /** + * synchronize_state: + * synchronize_pre_loadvm: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers from the hardware accelerator + * (the hardware accelerator is the reference). + */ void (*synchronize_state)(CPUState *cpu); void (*synchronize_pre_loadvm)(CPUState *cpu); void (*synchronize_pre_resume)(bool step_pending); + /* handle_interrupt is mandatory. */ void (*handle_interrupt)(CPUState *cpu, int mask); /** @@ -70,4 +88,6 @@ struct AccelOpsClass { void (*remove_all_breakpoints)(CPUState *cpu); }; +void generic_handle_interrupt(CPUState *cpu, int mask); + #endif /* ACCEL_OPS_H */ diff --git a/include/system/cpus.h b/include/system/cpus.h index 3226c76..69be6a7 100644 --- a/include/system/cpus.h +++ b/include/system/cpus.h @@ -7,11 +7,6 @@ void cpus_register_accel(const AccelOpsClass *i); /* return registers ops */ const AccelOpsClass *cpus_get_accel(void); -/* accel/dummy-cpus.c */ - -/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */ -void dummy_start_vcpu_thread(CPUState *); - /* interface available for cpus accelerator threads */ /* For temporary buffers for forming a name */ diff --git a/include/system/hvf.h b/include/system/hvf.h index a9a502f..d3dcf08 100644 --- a/include/system/hvf.h +++ b/include/system/hvf.h @@ -14,10 +14,6 @@ #define HVF_H #include "qemu/accel.h" -#include "qemu/queue.h" -#include "exec/vaddr.h" -#include "qom/object.h" -#include "exec/vaddr.h" #ifdef COMPILING_PER_TARGET # ifdef CONFIG_HVF @@ -40,38 +36,4 @@ typedef struct HVFState HVFState; DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE, TYPE_HVF_ACCEL) -#ifdef COMPILING_PER_TARGET -struct hvf_sw_breakpoint { - vaddr pc; - vaddr saved_insn; - int use_count; - QTAILQ_ENTRY(hvf_sw_breakpoint) entry; -}; - -struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, - vaddr pc); -int hvf_sw_breakpoints_active(CPUState *cpu); - -int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); -int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); -void hvf_arch_remove_all_hw_breakpoints(void); - -/* - * hvf_update_guest_debug: - * @cs: CPUState for the CPU to update - * - * Update guest to enable or disable debugging. Per-arch specifics will be - * handled by calling down to hvf_arch_update_guest_debug. - */ -int hvf_update_guest_debug(CPUState *cpu); -void hvf_arch_update_guest_debug(CPUState *cpu); - -/* - * Return whether the guest supports debugging. - */ -bool hvf_arch_supports_guest_debug(void); -#endif /* COMPILING_PER_TARGET */ - #endif diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h index d774e58..5150c7d 100644 --- a/include/system/hvf_int.h +++ b/include/system/hvf_int.h @@ -12,6 +12,8 @@ #define HVF_INT_H #include "qemu/queue.h" +#include "exec/vaddr.h" +#include "qom/object.h" #ifdef __aarch64__ #include <Hypervisor/Hypervisor.h> @@ -60,7 +62,6 @@ struct AccelCPUState { bool vtimer_masked; sigset_t unblock_ipi_mask; bool guest_debug_enabled; - bool dirty; }; void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, @@ -77,4 +78,36 @@ int hvf_put_registers(CPUState *); int hvf_get_registers(CPUState *); void hvf_kick_vcpu_thread(CPUState *cpu); +struct hvf_sw_breakpoint { + vaddr pc; + vaddr saved_insn; + int use_count; + QTAILQ_ENTRY(hvf_sw_breakpoint) entry; +}; + +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, + vaddr pc); +int hvf_sw_breakpoints_active(CPUState *cpu); + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); +void hvf_arch_remove_all_hw_breakpoints(void); + +/* + * hvf_update_guest_debug: + * @cs: CPUState for the CPU to update + * + * Update guest to enable or disable debugging. Per-arch specifics will be + * handled by calling down to hvf_arch_update_guest_debug. + */ +int hvf_update_guest_debug(CPUState *cpu); +void hvf_arch_update_guest_debug(CPUState *cpu); + +/* + * Return whether the guest supports debugging. + */ +bool hvf_arch_supports_guest_debug(void); + #endif diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h index 380e9e6..fa9228d 100644 --- a/include/system/hw_accel.h +++ b/include/system/hw_accel.h @@ -17,9 +17,26 @@ #include "system/whpx.h" #include "system/nvmm.h" +/** + * cpu_synchronize_state: + * cpu_synchronize_pre_loadvm: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers from the hardware accelerator + * (the hardware accelerator is the reference). + */ void cpu_synchronize_state(CPUState *cpu); +void cpu_synchronize_pre_loadvm(CPUState *cpu); + +/** + * cpu_synchronize_post_reset: + * cpu_synchronize_post_init: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers to the hardware accelerator + * (QEMU is the reference). + */ void cpu_synchronize_post_reset(CPUState *cpu); void cpu_synchronize_post_init(CPUState *cpu); -void cpu_synchronize_pre_loadvm(CPUState *cpu); #endif /* QEMU_HW_ACCEL_H */ diff --git a/include/system/iommufd.h b/include/system/iommufd.h index 283861b..c9c72ff 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -32,6 +32,7 @@ struct IOMMUFDBackend { /*< protected >*/ int fd; /* /dev/iommu file descriptor */ bool owned; /* is the /dev/iommu opened internally */ + Error *cpr_blocker;/* set if be does not support CPR */ uint32_t users; /*< public >*/ @@ -43,6 +44,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be); bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, Error **errp); void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, int fd, + unsigned long start, bool readonly); int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, @@ -66,6 +70,9 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, uint32_t *entry_num, void *data, Error **errp); +bool iommufd_change_process_capable(IOMMUFDBackend *be); +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp); + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, HOST_IOMMU_DEVICE_IOMMUFD) diff --git a/include/system/kvm.h b/include/system/kvm.h index 7cc60d2..3c7d314 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -195,6 +195,7 @@ bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_max_nested_state_length(void); int kvm_has_gsi_routing(void); +void kvm_close(void); /** * kvm_arm_supports_user_irq @@ -317,14 +318,6 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); bool kvm_device_supported(int vmfd, uint64_t type); /** - * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU - * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. - * - * @returns: 0 when success, errno (<0) when failed. - */ -int kvm_create_vcpu(CPUState *cpu); - -/** * kvm_park_vcpu - Park QEMU KVM vCPU context * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. * diff --git a/include/system/nvmm.h b/include/system/nvmm.h index 6971ddb..7390def 100644 --- a/include/system/nvmm.h +++ b/include/system/nvmm.h @@ -13,17 +13,18 @@ #define QEMU_NVMM_H #ifdef COMPILING_PER_TARGET - -#ifdef CONFIG_NVMM - -int nvmm_enabled(void); - -#else /* CONFIG_NVMM */ - -#define nvmm_enabled() (0) - -#endif /* CONFIG_NVMM */ - +# ifdef CONFIG_NVMM +# define CONFIG_NVMM_IS_POSSIBLE +# endif /* !CONFIG_NVMM */ +#else +# define CONFIG_NVMM_IS_POSSIBLE #endif /* COMPILING_PER_TARGET */ +#ifdef CONFIG_NVMM_IS_POSSIBLE +extern bool nvmm_allowed; +#define nvmm_enabled() (nvmm_allowed) +#else /* !CONFIG_NVMM_IS_POSSIBLE */ +#define nvmm_enabled() 0 +#endif /* !CONFIG_NVMM_IS_POSSIBLE */ + #endif /* QEMU_NVMM_H */ diff --git a/include/system/whpx.h b/include/system/whpx.h index 00ff409..00f6a3e 100644 --- a/include/system/whpx.h +++ b/include/system/whpx.h @@ -16,19 +16,20 @@ #define QEMU_WHPX_H #ifdef COMPILING_PER_TARGET +# ifdef CONFIG_WHPX +# define CONFIG_WHPX_IS_POSSIBLE +# endif /* !CONFIG_WHPX */ +#else +# define CONFIG_WHPX_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ -#ifdef CONFIG_WHPX - -int whpx_enabled(void); +#ifdef CONFIG_WHPX_IS_POSSIBLE +extern bool whpx_allowed; +#define whpx_enabled() (whpx_allowed) bool whpx_apic_in_platform(void); - -#else /* CONFIG_WHPX */ - -#define whpx_enabled() (0) +#else /* !CONFIG_WHPX_IS_POSSIBLE */ +#define whpx_enabled() 0 #define whpx_apic_in_platform() (0) - -#endif /* CONFIG_WHPX */ - -#endif /* COMPILING_PER_TARGET */ +#endif /* !CONFIG_WHPX_IS_POSSIBLE */ #endif /* QEMU_WHPX_H */ diff --git a/linux-user/main.c b/linux-user/main.c index 5ac5b55..a9142ee 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -820,7 +820,7 @@ int main(int argc, char **argv, char **envp) opt_one_insn_per_tb, &error_abort); object_property_set_int(OBJECT(accel), "tb-size", opt_tb_size, &error_abort); - ac->init_machine(NULL); + ac->init_machine(accel, NULL); } /* diff --git a/migration/cpr.c b/migration/cpr.c index a50a57e..42ad0b0 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -7,25 +7,21 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "hw/vfio/vfio-device.h" #include "migration/cpr.h" #include "migration/misc.h" #include "migration/options.h" #include "migration/qemu-file.h" #include "migration/savevm.h" #include "migration/vmstate.h" +#include "monitor/monitor.h" #include "system/runstate.h" #include "trace.h" /*************************************************************************/ /* cpr state container for all information to be saved. */ -typedef QLIST_HEAD(CprFdList, CprFd) CprFdList; - -typedef struct CprState { - CprFdList fds; -} CprState; - -static CprState cpr_state; +CprState cpr_state; /****************************************************************************/ @@ -126,8 +122,6 @@ int cpr_open_fd(const char *path, int flags, const char *name, int id, } /*************************************************************************/ -#define CPR_STATE "CprState" - static const VMStateDescription vmstate_cpr_state = { .name = CPR_STATE, .version_id = 1, @@ -135,6 +129,10 @@ static const VMStateDescription vmstate_cpr_state = { .fields = (VMStateField[]) { VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_cpr_vfio_devices, + NULL } }; /*************************************************************************/ @@ -264,3 +262,39 @@ bool cpr_incoming_needed(void *opaque) MigMode mode = migrate_mode(); return mode == MIG_MODE_CPR_TRANSFER; } + +/* + * cpr_get_fd_param: find a descriptor and return its value. + * + * @name: CPR name for the descriptor + * @fdname: An integer-valued string, or a name passed to a getfd command + * @index: CPR index of the descriptor + * @errp: returned error message + * + * If CPR is not being performed, then use @fdname to find the fd. + * If CPR is being performed, then ignore @fdname, and look for @name + * and @index in CPR state. + * + * On success returns the fd value, else returns -1. + */ +int cpr_get_fd_param(const char *name, const char *fdname, int index, + Error **errp) +{ + ERRP_GUARD(); + int fd; + + if (cpr_is_incoming()) { + fd = cpr_find_fd(name, index); + if (fd < 0) { + error_setg(errp, "cannot find saved value for fd %s", fdname); + } + } else { + fd = monitor_fd_param(monitor_cur(), fdname, errp); + if (fd >= 0) { + cpr_save_fd(name, index, fd); + } else { + error_prepend(errp, "Could not parse object fd %s:", fdname); + } + } + return fd; +} diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c index 8eaf70d..e982061 100644 --- a/monitor/hmp-cmds-target.c +++ b/monitor/hmp-cmds-target.c @@ -102,7 +102,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict) if (all_cpus) { CPU_FOREACH(cs) { monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index); - cpu_dump_state(cs, NULL, CPU_DUMP_FPU); + cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU); } } else { cs = vcpu >= 0 ? qemu_get_cpu(vcpu) : mon_get_cpu(mon); @@ -117,7 +117,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict) } monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index); - cpu_dump_state(cs, NULL, CPU_DUMP_FPU); + cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU); } } diff --git a/qapi/machine.json b/qapi/machine.json index 0650b8d..f712e7d 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1762,24 +1762,6 @@ 'features': [ 'unstable' ] } ## -# @x-query-opcount: -# -# Query TCG opcode counters -# -# Features: -# -# @unstable: This command is meant for debugging. -# -# Returns: TCG opcode counters -# -# Since: 6.2 -## -{ 'command': 'x-query-opcount', - 'returns': 'HumanReadableText', - 'if': 'CONFIG_TCG', - 'features': [ 'unstable' ] } - -## # @x-query-ramblock: # # Query system ramblock information diff --git a/qapi/migration.json b/qapi/migration.json index 4963f6c..e8a7d3b 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -620,8 +620,10 @@ # # @cpr-transfer: This mode allows the user to transfer a guest to a # new QEMU instance on the same host with minimal guest pause -# time by preserving guest RAM in place. Devices and their pinned -# pages will also be preserved in a future QEMU release. +# time by preserving guest RAM in place. +# +# Devices and their pinned pages are also preserved for VFIO and +# IOMMUFD. (since 10.1) # # The user starts new QEMU on the same host as old QEMU, with # command-line arguments to create the same machine, plus the diff --git a/system/cpus.c b/system/cpus.c index d16b0df..0d0eec8 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -254,7 +254,7 @@ int64_t cpus_get_elapsed_ticks(void) return cpu_get_ticks(); } -static void generic_handle_interrupt(CPUState *cpu, int mask) +void generic_handle_interrupt(CPUState *cpu, int mask) { cpu->interrupt_request |= mask; @@ -265,11 +265,9 @@ static void generic_handle_interrupt(CPUState *cpu, int mask) void cpu_interrupt(CPUState *cpu, int mask) { - if (cpus_accel->handle_interrupt) { - cpus_accel->handle_interrupt(cpu, mask); - } else { - generic_handle_interrupt(cpu, mask); - } + g_assert(bql_locked()); + + cpus_accel->handle_interrupt(cpu, mask); } /* @@ -678,6 +676,8 @@ void cpus_register_accel(const AccelOpsClass *ops) { assert(ops != NULL); assert(ops->create_vcpu_thread != NULL); /* mandatory */ + assert(ops->handle_interrupt); + cpus_accel = ops; } diff --git a/system/memory.c b/system/memory.c index 76b44b8..e8d9b15 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3501,7 +3501,7 @@ static void mtree_print_flatview(gpointer key, gpointer value, if (fvi->ac) { for (i = 0; i < fv_address_spaces->len; ++i) { as = g_array_index(fv_address_spaces, AddressSpace*, i); - if (fvi->ac->has_memory(current_machine, as, + if (fvi->ac->has_memory(current_machine->accelerator, as, int128_get64(range->addr.start), MR_SIZE(range->addr.size) + 1)) { qemu_printf(" %s", fvi->ac->name); diff --git a/system/physmem.c b/system/physmem.c index ff0ca40..130c148 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1593,6 +1593,11 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb) return rb->offset; } +ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb) +{ + return rb->fd_offset; +} + ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) { return rb->used_length; diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 7b6d291..c9cfcdc 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -813,9 +813,9 @@ int hvf_put_registers(CPUState *cpu) static void flush_cpu_state(CPUState *cpu) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } } diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 99e37a3..818b504 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -733,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (hvf_inject_interrupts(cpu)) { diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 2057314..17fce1d 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->accel->dirty) { + if (!cs->vcpu_dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 2144307..a5517b0 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -87,6 +87,7 @@ static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = nvmm_start_vcpu_thread; ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index f1c6120..b4a4d50 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -30,7 +30,6 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; - bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -47,7 +46,7 @@ struct qemu_machine { /* -------------------------------------------------------------------------- */ -static bool nvmm_allowed; +bool nvmm_allowed; static struct qemu_machine qemu_mach; static struct nvmm_machine * @@ -508,7 +507,7 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static void @@ -517,7 +516,7 @@ nvmm_mem_callback(struct nvmm_mem *mem) cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -727,9 +726,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (qcpu->stop) { @@ -827,32 +826,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -982,7 +981,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - qcpu->dirty = true; + qcpu->vcpu_dirty = true; cpu->accel = qcpu; return 0; @@ -1153,7 +1152,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = { /* -------------------------------------------------------------------------- */ static int -nvmm_accel_init(MachineState *ms) +nvmm_accel_init(AccelState *as, MachineState *ms) { int ret, err; @@ -1193,12 +1192,6 @@ nvmm_accel_init(MachineState *ms) return 0; } -int -nvmm_enabled(void) -{ - return nvmm_allowed; -} - static void nvmm_accel_class_init(ObjectClass *oc, const void *data) { diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index b8bebe4..31cf15f 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -90,6 +90,7 @@ static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index cf6d3e4..721c478 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -237,13 +237,12 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; - bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; }; -static bool whpx_allowed; +bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; static uint32_t max_vcpu_index; @@ -836,7 +835,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; return hr; } @@ -1391,7 +1390,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1704,9 +1703,9 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { @@ -2054,9 +2053,9 @@ static int whpx_vcpu_run(CPUState *cpu) static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { whpx_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } @@ -2064,20 +2063,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } /* @@ -2086,7 +2085,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2226,7 +2225,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - vcpu->dirty = true; + cpu->vcpu_dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); @@ -2505,7 +2504,7 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, * Partition support */ -static int whpx_accel_init(MachineState *ms) +static int whpx_accel_init(AccelState *as, MachineState *ms) { struct whpx_state *whpx; int ret; @@ -2689,11 +2688,6 @@ error: return ret; } -int whpx_enabled(void) -{ - return whpx_allowed; -} - bool whpx_apic_in_platform(void) { return whpx_global.apic_in_platform; } diff --git a/tests/functional/aspeed.py b/tests/functional/aspeed.py index 7a40d5d..b131703 100644 --- a/tests/functional/aspeed.py +++ b/tests/functional/aspeed.py @@ -8,8 +8,13 @@ from qemu_test import LinuxKernelTest class AspeedTest(LinuxKernelTest): def do_test_arm_aspeed_openbmc(self, machine, image, uboot='2019.04', - cpu_id='0x0', soc='AST2500 rev A1'): - hostname = machine.removesuffix('-bmc') + cpu_id='0x0', soc='AST2500 rev A1', + image_hostname=None): + # Allow for the image hostname to not end in "-bmc" + if image_hostname is not None: + hostname = image_hostname + else: + hostname = machine.removesuffix('-bmc') self.set_machine(machine) self.vm.set_console() diff --git a/tests/functional/meson.build b/tests/functional/meson.build index b542b3a..050c900 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -32,6 +32,8 @@ test_timeouts = { 'arm_aspeed_ast2500' : 720, 'arm_aspeed_ast2600' : 1200, 'arm_aspeed_bletchley' : 480, + 'arm_aspeed_catalina' : 480, + 'arm_aspeed_gb200nvl_bmc' : 480, 'arm_aspeed_rainier' : 480, 'arm_bpim2u' : 500, 'arm_collie' : 180, @@ -127,6 +129,8 @@ tests_arm_system_thorough = [ 'arm_aspeed_ast2500', 'arm_aspeed_ast2600', 'arm_aspeed_bletchley', + 'arm_aspeed_catalina', + 'arm_aspeed_gb200nvl_bmc', 'arm_aspeed_rainier', 'arm_bpim2u', 'arm_canona1100', diff --git a/tests/functional/test_arm_aspeed_catalina.py b/tests/functional/test_arm_aspeed_catalina.py new file mode 100755 index 0000000..dc2f24e --- /dev/null +++ b/tests/functional/test_arm_aspeed_catalina.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# +# Functional test that boots the ASPEED machines +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import Asset +from aspeed import AspeedTest + + +class CatalinaMachine(AspeedTest): + + ASSET_CATALINA_FLASH = Asset( + 'https://github.com/legoater/qemu-aspeed-boot/raw/a866feb5ef81245b4827a214584bf6bcc72939f6/images/catalina-bmc/obmc-phosphor-image-catalina-20250619123021.static.mtd.xz', + '287402e1ba021991e06be1d098f509444a02a3d81a73a932f66528b159e864f9') + + def test_arm_ast2600_catalina_openbmc(self): + image_path = self.uncompress(self.ASSET_CATALINA_FLASH) + + self.do_test_arm_aspeed_openbmc('catalina-bmc', image=image_path, + uboot='2019.04', cpu_id='0xf00', + soc='AST2600 rev A3') + +if __name__ == '__main__': + AspeedTest.main() diff --git a/tests/functional/test_arm_aspeed_gb200nvl_bmc.py b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py new file mode 100644 index 0000000..8e8e3f0 --- /dev/null +++ b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# +# Functional test that boots the ASPEED machines +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import Asset +from aspeed import AspeedTest + + +class GB200Machine(AspeedTest): + + ASSET_GB200_FLASH = Asset( + 'https://github.com/legoater/qemu-aspeed-boot/raw/refs/heads/master/images/gb200nvl-obmc/obmc-phosphor-image-gb200nvl-obmc-20250702182348.static.mtd.xz', + 'b84819317cb3dc762895ad507705978ef000bfc77c50c33a63bdd37921db0dbc') + + def test_arm_aspeed_gb200_openbmc(self): + image_path = self.uncompress(self.ASSET_GB200_FLASH) + + self.do_test_arm_aspeed_openbmc('gb200nvl-bmc', image=image_path, + uboot='2019.04', cpu_id='0xf00', + soc='AST2600 rev A3', + image_hostname='gb200nvl-obmc') + +if __name__ == '__main__': + AspeedTest.main() diff --git a/tests/qtest/aspeed_scu-test.c b/tests/qtest/aspeed_scu-test.c new file mode 100644 index 0000000..ca09f91 --- /dev/null +++ b/tests/qtest/aspeed_scu-test.c @@ -0,0 +1,231 @@ +/* + * QTest testcase for the ASPEED AST2500 and AST2600 SCU. + * + * SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (C) 2025 Tan Siewert + */ + +#include "qemu/osdep.h" +#include "libqtest-single.h" + +/* + * SCU base, as well as protection key are + * the same on AST2500 and 2600. + */ +#define AST_SCU_BASE 0x1E6E2000 +#define AST_SCU_PROT_LOCK_STATE 0x0 +#define AST_SCU_PROT_LOCK_VALUE 0x2 +#define AST_SCU_PROT_UNLOCK_STATE 0x1 +#define AST_SCU_PROT_UNLOCK_VALUE 0x1688A8A8 + +#define AST2500_MACHINE "-machine ast2500-evb" +#define AST2500_SCU_PROT_REG 0x00 +#define AST2500_SCU_MISC_2_CONTROL_REG 0x4C + +#define AST2600_MACHINE "-machine ast2600-evb" +/* AST2600 has two protection registers */ +#define AST2600_SCU_PROT_REG 0x000 +#define AST2600_SCU_PROT_REG2 0x010 +#define AST2600_SCU_MISC_2_CONTROL_REG 0x0C4 + +#define TEST_LOCK_ARBITRARY_VALUE 0xABCDEFAB + +/** + * Assert that a given register matches an expected value. + * + * Reads the register and checks if its value equals the expected value. + * + * @param *s - QTest machine state + * @param reg - Address of the register to be checked + * @param expected - Expected register value + */ +static inline void assert_register_eq(QTestState *s, + uint32_t reg, + uint32_t expected) +{ + uint32_t value = qtest_readl(s, reg); + g_assert_cmphex(value, ==, expected); +} + +/** + * Assert that a given register does not match a specific value. + * + * Reads the register and checks that its value is not equal to the + * provided value. + * + * @param *s - QTest machine state + * @param reg - Address of the register to be checked + * @param not_expected - Value the register must not contain + */ +static inline void assert_register_neq(QTestState *s, + uint32_t reg, + uint32_t not_expected) +{ + uint32_t value = qtest_readl(s, reg); + g_assert_cmphex(value, !=, not_expected); +} + +/** + * Test whether the SCU can be locked and unlocked correctly. + * + * When testing multiple registers, this function assumes that writing + * to the first register also affects the others. However, writing to + * any other register only affects itself. + * + * @param *machine - input machine configuration, passed directly + * to QTest + * @param regs[] - List of registers to be checked + * @param regc - amount of arguments for registers to be checked + */ +static void test_protection_register(const char *machine, + const uint32_t regs[], + const int regc) +{ + QTestState *s = qtest_init(machine); + + for (int i = 0; i < regc; i++) { + uint32_t reg = regs[i]; + + qtest_writel(s, reg, AST_SCU_PROT_UNLOCK_VALUE); + assert_register_eq(s, reg, AST_SCU_PROT_UNLOCK_STATE); + + /** + * Check that other registers are unlocked too, if more + * than one is available. + */ + if (regc > 1 && i == 0) { + /* Initialise at 1 instead of 0 to skip first */ + for (int j = 1; j < regc; j++) { + uint32_t add_reg = regs[j]; + assert_register_eq(s, add_reg, AST_SCU_PROT_UNLOCK_STATE); + } + } + + /* Lock the register again */ + qtest_writel(s, reg, AST_SCU_PROT_LOCK_VALUE); + assert_register_eq(s, reg, AST_SCU_PROT_LOCK_STATE); + + /* And the same for locked state */ + if (regc > 1 && i == 0) { + /* Initialise at 1 instead of 0 to skip first */ + for (int j = 1; j < regc; j++) { + uint32_t add_reg = regs[j]; + assert_register_eq(s, add_reg, AST_SCU_PROT_LOCK_STATE); + } + } + } + + qtest_quit(s); +} + +static void test_2500_protection_register(void) +{ + uint32_t regs[] = { AST_SCU_BASE + AST2500_SCU_PROT_REG }; + + test_protection_register(AST2500_MACHINE, + regs, + ARRAY_SIZE(regs)); +} + +static void test_2600_protection_register(void) +{ + /** + * The AST2600 has two protection registers, both + * being required to be unlocked to do any operation. + * + * Modifying SCU000 also modifies SCU010, but modifying + * SCU010 only will keep SCU000 untouched. + */ + uint32_t regs[] = { AST_SCU_BASE + AST2600_SCU_PROT_REG, + AST_SCU_BASE + AST2600_SCU_PROT_REG2 }; + + test_protection_register(AST2600_MACHINE, + regs, + ARRAY_SIZE(regs)); +} + +/** + * Test if SCU register writes are correctly allowed or blocked + * depending on the protection register state. + * + * The test first locks the protection register and verifies that + * writes to the target SCU register are rejected. It then unlocks + * the protection register and confirms that the written value is + * retained when unlocked. + * + * @param *machine - input machine configuration, passed directly + * to QTest + * @param protection_register - first SCU protection key register + * (only one for keeping it simple) + * @param test_register - Register to be used for writing arbitrary + * values + */ +static void test_write_permission_lock_state(const char *machine, + const uint32_t protection_register, + const uint32_t test_register) +{ + QTestState *s = qtest_init(machine); + + /* Arbitrary value to lock provided SCU protection register */ + qtest_writel(s, protection_register, AST_SCU_PROT_LOCK_VALUE); + + /* Ensure that the SCU is really locked */ + assert_register_eq(s, protection_register, AST_SCU_PROT_LOCK_STATE); + + /* Write a known arbitrary value to test that the write is blocked */ + qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /* We do not want to have the written value to be saved */ + assert_register_neq(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /** + * Unlock the SCU and verify that it can be written to. + * Assumes that the first SCU protection register is sufficient to + * unlock all protection registers, if multiple are present. + */ + qtest_writel(s, protection_register, AST_SCU_PROT_UNLOCK_VALUE); + assert_register_eq(s, protection_register, AST_SCU_PROT_UNLOCK_STATE); + + /* Write a known arbitrary value to test that the write works */ + qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /* Ensure that the written value is retained */ + assert_register_eq(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + qtest_quit(s); +} + +static void test_2500_write_permission_lock_state(void) +{ + test_write_permission_lock_state( + AST2500_MACHINE, + AST_SCU_BASE + AST2500_SCU_PROT_REG, + AST_SCU_BASE + AST2500_SCU_MISC_2_CONTROL_REG + ); +} + +static void test_2600_write_permission_lock_state(void) +{ + test_write_permission_lock_state( + AST2600_MACHINE, + AST_SCU_BASE + AST2600_SCU_PROT_REG, + AST_SCU_BASE + AST2600_SCU_MISC_2_CONTROL_REG + ); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/ast2500/scu/protection_register", + test_2500_protection_register); + qtest_add_func("/ast2600/scu/protection_register", + test_2600_protection_register); + + qtest_add_func("/ast2500/scu/write_permission_lock_state", + test_2500_write_permission_lock_state); + qtest_add_func("/ast2600/scu/write_permission_lock_state", + test_2600_write_permission_lock_state); + + return g_test_run(); +} diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 8ad8490..91b4a71 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -215,6 +215,7 @@ qtests_npcm8xx = \ qtests_aspeed = \ ['aspeed_gpio-test', 'aspeed_hace-test', + 'aspeed_scu-test', 'aspeed_smc-test'] qtests_aspeed64 = \ ['ast2700-gpio-test', diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index 040d042..cf71876 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -51,7 +51,6 @@ static int query_error_class(const char *cmd) { "x-query-usb", ERROR_CLASS_GENERIC_ERROR }, /* Only valid with accel=tcg */ { "x-query-jit", ERROR_CLASS_GENERIC_ERROR }, - { "x-query-opcount", ERROR_CLASS_GENERIC_ERROR }, { "xen-event-list", ERROR_CLASS_GENERIC_ERROR }, { NULL, -1 } }; |