diff options
131 files changed, 3237 insertions, 844 deletions
@@ -11,4 +11,3 @@ prep-perpatch-check-cmd = scripts/checkpatch.pl -q --terse --no-summary --mailback - searchmask = https://lore.kernel.org/qemu-devel/?x=m&t=1&q=%s linkmask = https://lore.kernel.org/qemu-devel/%s - linktrailermask = Message-ID: <%s> diff --git a/MAINTAINERS b/MAINTAINERS index b1cbfe1..1842c3d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -495,6 +495,7 @@ Guest CPU Cores (other accelerators) Overall M: Richard Henderson <richard.henderson@linaro.org> R: Paolo Bonzini <pbonzini@redhat.com> +R: Philippe Mathieu-Daudé <philmd@linaro.org> S: Maintained F: include/exec/cpu*.h F: include/exec/target_long.h @@ -503,6 +504,7 @@ F: include/system/accel-*.h F: include/system/cpus.h F: include/accel/accel-cpu*.h F: accel/accel-*.? +F: accel/dummy-cpus.? F: accel/Makefile.objs F: accel/stubs/Makefile.objs F: cpu-common.c @@ -540,6 +542,7 @@ WHPX CPUs M: Sunil Muthuswamy <sunilmut@microsoft.com> S: Supported F: target/i386/whpx/ +F: accel/stubs/whpx-stub.c F: include/system/whpx.h X86 Instruction Emulator @@ -586,6 +589,7 @@ NetBSD Virtual Machine Monitor (NVMM) CPU support M: Reinoud Zandijk <reinoud@netbsd.org> S: Maintained F: include/system/nvmm.h +F: accel/stubs/nvmm-stub.c F: target/i386/nvmm/ Hosts @@ -1697,6 +1701,13 @@ S: Maintained F: hw/riscv/microblaze-v-generic.c F: docs/system/riscv/microblaze-v-generic.rst +Xiangshan Kunminghu +M: Ran Wang <wangran@bosc.ac.cn> +S: Maintained +F: docs/system/riscv/xiangshan-kunminghu.rst +F: hw/riscv/xiangshan_kmh.c +F: include/hw/riscv/xiangshan_kmh.h + RX Machines ----------- rx-gdbsim @@ -227,6 +227,7 @@ distclean: clean recurse-distclean rm -Rf .sdk qemu-bundle find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \ + -path "$(SRC_PATH)/.pc" -prune -o \ -type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \) .PHONY: ctags diff --git a/accel/accel-common.c b/accel/accel-common.c index 4894b98..591ff4c 100644 --- a/accel/accel-common.c +++ b/accel/accel-common.c @@ -124,7 +124,7 @@ int accel_supported_gdbstub_sstep_flags(void) AccelState *accel = current_accel(); AccelClass *acc = ACCEL_GET_CLASS(accel); if (acc->gdbstub_supported_sstep_flags) { - return acc->gdbstub_supported_sstep_flags(); + return acc->gdbstub_supported_sstep_flags(accel); } return 0; } diff --git a/accel/accel-system.c b/accel/accel-system.c index a0f562a..c54c30f 100644 --- a/accel/accel-system.c +++ b/accel/accel-system.c @@ -37,7 +37,7 @@ int accel_init_machine(AccelState *accel, MachineState *ms) int ret; ms->accelerator = accel; *(acc->allowed) = true; - ret = acc->init_machine(ms); + ret = acc->init_machine(accel, ms); if (ret < 0) { ms->accelerator = NULL; *(acc->allowed) = false; @@ -58,7 +58,16 @@ void accel_setup_post(MachineState *ms) AccelState *accel = ms->accelerator; AccelClass *acc = ACCEL_GET_CLASS(accel); if (acc->setup_post) { - acc->setup_post(ms, accel); + acc->setup_post(accel); + } +} + +void accel_pre_resume(MachineState *ms, bool step_pending) +{ + AccelState *accel = ms->accelerator; + AccelClass *acc = ACCEL_GET_CLASS(accel); + if (acc->pre_resume_vm) { + acc->pre_resume_vm(accel, step_pending); } } @@ -85,8 +94,9 @@ void accel_init_ops_interfaces(AccelClass *ac) * non-NULL create_vcpu_thread operation. */ ops = ACCEL_OPS_CLASS(oc); + ac->ops = ops; if (ops->ops_init) { - ops->ops_init(ops); + ops->ops_init(ac); } cpus_register_accel(ops); } diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index 8672761..03cfc0f 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -17,6 +17,7 @@ #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" +#include "accel/dummy-cpus.h" static void *dummy_cpu_thread_fn(void *arg) { diff --git a/accel/dummy-cpus.h b/accel/dummy-cpus.h new file mode 100644 index 0000000..d18dd0f --- /dev/null +++ b/accel/dummy-cpus.h @@ -0,0 +1,14 @@ +/* + * Dummy cpu thread code + * + * Copyright IBM, Corp. 2011 + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef ACCEL_DUMMY_CPUS_H +#define ACCEL_DUMMY_CPUS_H + +void dummy_start_vcpu_thread(CPUState *cpu); + +#endif diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index b389772..be8724a 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -48,18 +48,16 @@ */ #include "qemu/osdep.h" -#include "qemu/error-report.h" +#include "qemu/guest-random.h" #include "qemu/main-loop.h" -#include "system/address-spaces.h" +#include "qemu/queue.h" #include "gdbstub/enums.h" -#include "hw/boards.h" +#include "exec/cpu-common.h" +#include "hw/core/cpu.h" #include "system/accel-ops.h" #include "system/cpus.h" #include "system/hvf.h" #include "system/hvf_int.h" -#include "system/runstate.h" -#include "qemu/guest-random.h" -#include "trace.h" HVFState *hvf_state; @@ -79,143 +77,17 @@ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) return NULL; } -struct mac_slot { - int present; - uint64_t size; - uint64_t gpa_start; - uint64_t gva; -}; - -struct mac_slot mac_slots[32]; - -static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) -{ - struct mac_slot *macslot; - hv_return_t ret; - - macslot = &mac_slots[slot->slot_id]; - - if (macslot->present) { - if (macslot->size != slot->size) { - macslot->present = 0; - trace_hvf_vm_unmap(macslot->gpa_start, macslot->size); - ret = hv_vm_unmap(macslot->gpa_start, macslot->size); - assert_hvf_ok(ret); - } - } - - if (!slot->size) { - return 0; - } - - macslot->present = 1; - macslot->gpa_start = slot->start; - macslot->size = slot->size; - trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, - flags & HV_MEMORY_READ ? 'R' : '-', - flags & HV_MEMORY_WRITE ? 'W' : '-', - flags & HV_MEMORY_EXEC ? 'E' : '-'); - ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); - assert_hvf_ok(ret); - return 0; -} - -static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) -{ - hvf_slot *mem; - MemoryRegion *area = section->mr; - bool writable = !area->readonly && !area->rom_device; - hv_memory_flags_t flags; - uint64_t page_size = qemu_real_host_page_size(); - - if (!memory_region_is_ram(area)) { - if (writable) { - return; - } else if (!memory_region_is_romd(area)) { - /* - * If the memory device is not in romd_mode, then we actually want - * to remove the hvf memory slot so all accesses will trap. - */ - add = false; - } - } - - if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || - !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { - /* Not page aligned, so we can not map as RAM */ - add = false; - } - - mem = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - if (mem && add) { - if (mem->size == int128_get64(section->size) && - mem->start == section->offset_within_address_space && - mem->mem == (memory_region_get_ram_ptr(area) + - section->offset_within_region)) { - return; /* Same region was attempted to register, go away. */ - } - } - - /* Region needs to be reset. set the size to 0 and remap it. */ - if (mem) { - mem->size = 0; - if (do_hvf_set_memory(mem, 0)) { - error_report("Failed to reset overlapping slot"); - abort(); - } - } - - if (!add) { - return; - } - - if (area->readonly || - (!memory_region_is_ram(area) && memory_region_is_romd(area))) { - flags = HV_MEMORY_READ | HV_MEMORY_EXEC; - } else { - flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - } - - /* Now make a new slot. */ - int x; - - for (x = 0; x < hvf_state->num_slots; ++x) { - mem = &hvf_state->slots[x]; - if (!mem->size) { - break; - } - } - - if (x == hvf_state->num_slots) { - error_report("No free slots"); - abort(); - } - - mem->size = int128_get64(section->size); - mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; - mem->start = section->offset_within_address_space; - mem->region = area; - - if (do_hvf_set_memory(mem, flags)) { - error_report("Error registering new memory slot"); - abort(); - } -} - static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { hvf_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } static void hvf_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -224,7 +96,7 @@ static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu, run_on_cpu_data arg) { /* QEMU state is the reference, push it to HVF now and on next entry */ - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void hvf_cpu_synchronize_post_reset(CPUState *cpu) @@ -242,147 +114,10 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); } -static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) -{ - hvf_slot *slot; - - slot = hvf_find_overlap_slot( - section->offset_within_address_space, - int128_get64(section->size)); - - /* protect region against writes; begin tracking it */ - if (on) { - slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_EXEC); - /* stop tracking region*/ - } else { - slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); - } -} - -static void hvf_log_start(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (old != 0) { - return; - } - - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_log_stop(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) -{ - if (new != 0) { - return; - } - - hvf_set_dirty_tracking(section, 0); -} - -static void hvf_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - /* - * sync of dirty pages is handled elsewhere; just make sure we keep - * tracking the region. - */ - hvf_set_dirty_tracking(section, 1); -} - -static void hvf_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, true); -} - -static void hvf_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - hvf_set_phys_mem(section, false); -} - -static MemoryListener hvf_memory_listener = { - .name = "hvf", - .priority = MEMORY_LISTENER_PRIORITY_ACCEL, - .region_add = hvf_region_add, - .region_del = hvf_region_del, - .log_start = hvf_log_start, - .log_stop = hvf_log_stop, - .log_sync = hvf_log_sync, -}; - static void dummy_signal(int sig) { } -bool hvf_allowed; - -static int hvf_accel_init(MachineState *ms) -{ - int x; - hv_return_t ret; - HVFState *s; - int pa_range = 36; - MachineClass *mc = MACHINE_GET_CLASS(ms); - - if (mc->hvf_get_physical_address_range) { - pa_range = mc->hvf_get_physical_address_range(ms); - if (pa_range < 0) { - return -EINVAL; - } - } - - ret = hvf_arch_vm_create(ms, (uint32_t)pa_range); - assert_hvf_ok(ret); - - s = g_new0(HVFState, 1); - - s->num_slots = ARRAY_SIZE(s->slots); - for (x = 0; x < s->num_slots; ++x) { - s->slots[x].size = 0; - s->slots[x].slot_id = x; - } - - QTAILQ_INIT(&s->hvf_sw_breakpoints); - - hvf_state = s; - memory_listener_register(&hvf_memory_listener, &address_space_memory); - - return hvf_arch_init(); -} - -static inline int hvf_gdbstub_sstep_flags(void) -{ - return SSTEP_ENABLE | SSTEP_NOIRQ; -} - -static void hvf_accel_class_init(ObjectClass *oc, const void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HVF"; - ac->init_machine = hvf_accel_init; - ac->allowed = &hvf_allowed; - ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags; -} - -static const TypeInfo hvf_accel_type = { - .name = TYPE_HVF_ACCEL, - .parent = TYPE_ACCEL, - .instance_size = sizeof(HVFState), - .class_init = hvf_accel_class_init, -}; - -static void hvf_type_init(void) -{ - type_register_static(&hvf_accel_type); -} - -type_init(hvf_type_init); - static void hvf_vcpu_destroy(CPUState *cpu) { hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd); @@ -415,8 +150,8 @@ static int hvf_init_vcpu(CPUState *cpu) #else r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT); #endif - cpu->accel->dirty = true; assert_hvf_ok(r); + cpu->vcpu_dirty = true; cpu->accel->guest_debug_enabled = false; @@ -482,6 +217,34 @@ static void hvf_start_vcpu_thread(CPUState *cpu) cpu, QEMU_THREAD_JOINABLE); } +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc) +{ + struct hvf_sw_breakpoint *bp; + + QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) { + if (bp->pc == pc) { + return bp; + } + } + return NULL; +} + +int hvf_sw_breakpoints_active(CPUState *cpu) +{ + return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints); +} + +static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg) +{ + hvf_arch_update_guest_debug(cpu); +} + +int hvf_update_guest_debug(CPUState *cpu) +{ + run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL); + return 0; +} + static int hvf_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len) { struct hvf_sw_breakpoint *bp; @@ -590,6 +353,7 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = hvf_start_vcpu_thread; ops->kick_vcpu_thread = hvf_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset; ops->synchronize_post_init = hvf_cpu_synchronize_post_init; @@ -609,8 +373,10 @@ static const TypeInfo hvf_accel_ops_type = { .class_init = hvf_accel_ops_class_init, .abstract = true, }; + static void hvf_accel_ops_register_types(void) { type_register_static(&hvf_accel_ops_type); } + type_init(hvf_accel_ops_register_types); diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index 8c387fd..1fa07c8 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -10,9 +10,24 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "system/address-spaces.h" +#include "system/memory.h" #include "system/hvf.h" #include "system/hvf_int.h" #include "hw/core/cpu.h" +#include "hw/boards.h" +#include "trace.h" + +bool hvf_allowed; + +struct mac_slot { + int present; + uint64_t size; + uint64_t gpa_start; + uint64_t gva; +}; + +struct mac_slot mac_slots[32]; const char *hvf_return_string(hv_return_t ret) { @@ -42,30 +57,257 @@ void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, abort(); } -struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, vaddr pc) +static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) { - struct hvf_sw_breakpoint *bp; + struct mac_slot *macslot; + hv_return_t ret; - QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) { - if (bp->pc == pc) { - return bp; + macslot = &mac_slots[slot->slot_id]; + + if (macslot->present) { + if (macslot->size != slot->size) { + macslot->present = 0; + trace_hvf_vm_unmap(macslot->gpa_start, macslot->size); + ret = hv_vm_unmap(macslot->gpa_start, macslot->size); + assert_hvf_ok(ret); } } - return NULL; + + if (!slot->size) { + return 0; + } + + macslot->present = 1; + macslot->gpa_start = slot->start; + macslot->size = slot->size; + trace_hvf_vm_map(slot->start, slot->size, slot->mem, flags, + flags & HV_MEMORY_READ ? 'R' : '-', + flags & HV_MEMORY_WRITE ? 'W' : '-', + flags & HV_MEMORY_EXEC ? 'E' : '-'); + ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); + assert_hvf_ok(ret); + return 0; } -int hvf_sw_breakpoints_active(CPUState *cpu) +static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) { - return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints); + hvf_slot *mem; + MemoryRegion *area = section->mr; + bool writable = !area->readonly && !area->rom_device; + hv_memory_flags_t flags; + uint64_t page_size = qemu_real_host_page_size(); + + if (!memory_region_is_ram(area)) { + if (writable) { + return; + } else if (!memory_region_is_romd(area)) { + /* + * If the memory device is not in romd_mode, then we actually want + * to remove the hvf memory slot so all accesses will trap. + */ + add = false; + } + } + + if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || + !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { + /* Not page aligned, so we can not map as RAM */ + add = false; + } + + mem = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + if (mem && add) { + if (mem->size == int128_get64(section->size) && + mem->start == section->offset_within_address_space && + mem->mem == (memory_region_get_ram_ptr(area) + + section->offset_within_region)) { + return; /* Same region was attempted to register, go away. */ + } + } + + /* Region needs to be reset. set the size to 0 and remap it. */ + if (mem) { + mem->size = 0; + if (do_hvf_set_memory(mem, 0)) { + error_report("Failed to reset overlapping slot"); + abort(); + } + } + + if (!add) { + return; + } + + if (area->readonly || + (!memory_region_is_ram(area) && memory_region_is_romd(area))) { + flags = HV_MEMORY_READ | HV_MEMORY_EXEC; + } else { + flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; + } + + /* Now make a new slot. */ + int x; + + for (x = 0; x < hvf_state->num_slots; ++x) { + mem = &hvf_state->slots[x]; + if (!mem->size) { + break; + } + } + + if (x == hvf_state->num_slots) { + error_report("No free slots"); + abort(); + } + + mem->size = int128_get64(section->size); + mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; + mem->start = section->offset_within_address_space; + mem->region = area; + + if (do_hvf_set_memory(mem, flags)) { + error_report("Error registering new memory slot"); + abort(); + } } -static void do_hvf_update_guest_debug(CPUState *cpu, run_on_cpu_data arg) +static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) { - hvf_arch_update_guest_debug(cpu); + hvf_slot *slot; + + slot = hvf_find_overlap_slot( + section->offset_within_address_space, + int128_get64(section->size)); + + /* protect region against writes; begin tracking it */ + if (on) { + slot->flags |= HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_EXEC); + /* stop tracking region*/ + } else { + slot->flags &= ~HVF_SLOT_LOG; + hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, + HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); + } } -int hvf_update_guest_debug(CPUState *cpu) +static void hvf_log_start(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) { - run_on_cpu(cpu, do_hvf_update_guest_debug, RUN_ON_CPU_NULL); - return 0; + if (old != 0) { + return; + } + + hvf_set_dirty_tracking(section, 1); } + +static void hvf_log_stop(MemoryListener *listener, + MemoryRegionSection *section, int old, int new) +{ + if (new != 0) { + return; + } + + hvf_set_dirty_tracking(section, 0); +} + +static void hvf_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + /* + * sync of dirty pages is handled elsewhere; just make sure we keep + * tracking the region. + */ + hvf_set_dirty_tracking(section, 1); +} + +static void hvf_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, true); +} + +static void hvf_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + hvf_set_phys_mem(section, false); +} + +static MemoryListener hvf_memory_listener = { + .name = "hvf", + .priority = MEMORY_LISTENER_PRIORITY_ACCEL, + .region_add = hvf_region_add, + .region_del = hvf_region_del, + .log_start = hvf_log_start, + .log_stop = hvf_log_stop, + .log_sync = hvf_log_sync, +}; + +static int hvf_accel_init(AccelState *as, MachineState *ms) +{ + int x; + hv_return_t ret; + HVFState *s = HVF_STATE(as); + int pa_range = 36; + MachineClass *mc = MACHINE_GET_CLASS(ms); + + if (mc->hvf_get_physical_address_range) { + pa_range = mc->hvf_get_physical_address_range(ms); + if (pa_range < 0) { + return -EINVAL; + } + } + + ret = hvf_arch_vm_create(ms, (uint32_t)pa_range); + if (ret == HV_DENIED) { + error_report("Could not access HVF. Is the executable signed" + " with com.apple.security.hypervisor entitlement?"); + exit(1); + } + assert_hvf_ok(ret); + + s->num_slots = ARRAY_SIZE(s->slots); + for (x = 0; x < s->num_slots; ++x) { + s->slots[x].size = 0; + s->slots[x].slot_id = x; + } + + QTAILQ_INIT(&s->hvf_sw_breakpoints); + + hvf_state = s; + memory_listener_register(&hvf_memory_listener, &address_space_memory); + + return hvf_arch_init(); +} + +static int hvf_gdbstub_sstep_flags(AccelState *as) +{ + return SSTEP_ENABLE | SSTEP_NOIRQ; +} + +static void hvf_accel_class_init(ObjectClass *oc, const void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "HVF"; + ac->init_machine = hvf_accel_init; + ac->allowed = &hvf_allowed; + ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags; +} + +static const TypeInfo hvf_accel_type = { + .name = TYPE_HVF_ACCEL, + .parent = TYPE_ACCEL, + .instance_size = sizeof(HVFState), + .class_init = hvf_accel_class_init, +}; + +static void hvf_type_init(void) +{ + type_register_static(&hvf_accel_type); +} + +type_init(hvf_type_init); diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index e5c1544..0eafc90 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -101,6 +101,7 @@ static void kvm_accel_ops_class_init(ObjectClass *oc, const void *data) ops->synchronize_post_init = kvm_cpu_synchronize_post_init; ops->synchronize_state = kvm_cpu_synchronize_state; ops->synchronize_pre_loadvm = kvm_cpu_synchronize_pre_loadvm; + ops->handle_interrupt = generic_handle_interrupt; #ifdef TARGET_KVM_HAVE_GUEST_DEBUG ops->update_guest_debug = kvm_update_guest_debug_ops; diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index d095d1b..a106d1b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -453,7 +453,13 @@ static void kvm_reset_parked_vcpus(KVMState *s) } } -int kvm_create_vcpu(CPUState *cpu) +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +static int kvm_create_vcpu(CPUState *cpu) { unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); KVMState *s = kvm_state; @@ -515,16 +521,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) goto err; } + /* If I am the CPU that created coalesced_mmio_ring, then discard it */ + if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) { + s->coalesced_mmio_ring = NULL; + } + ret = munmap(cpu->kvm_run, mmap_size); if (ret < 0) { goto err; } + cpu->kvm_run = NULL; if (cpu->kvm_dirty_gfns) { ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes); if (ret < 0) { goto err; } + cpu->kvm_dirty_gfns = NULL; } kvm_park_vcpu(cpu); @@ -608,6 +621,31 @@ err: return ret; } +void kvm_close(void) +{ + CPUState *cpu; + + if (!kvm_state || kvm_state->fd == -1) { + return; + } + + CPU_FOREACH(cpu) { + cpu_remove_sync(cpu); + close(cpu->kvm_fd); + cpu->kvm_fd = -1; + close(cpu->kvm_vcpu_stats_fd); + cpu->kvm_vcpu_stats_fd = -1; + } + + if (kvm_state && kvm_state->fd != -1) { + close(kvm_state->vmfd); + kvm_state->vmfd = -1; + close(kvm_state->fd); + kvm_state->fd = -1; + } + kvm_state = NULL; +} + /* * dirty pages logging control */ @@ -2464,13 +2502,10 @@ uint32_t kvm_dirty_ring_size(void) return kvm_state->kvm_dirty_ring_size; } -static int do_kvm_create_vm(MachineState *ms, int type) +static int do_kvm_create_vm(KVMState *s, int type) { - KVMState *s; int ret; - s = KVM_STATE(ms->accelerator); - do { ret = kvm_ioctl(s, KVM_CREATE_VM, type); } while (ret == -EINTR); @@ -2567,7 +2602,7 @@ static int kvm_setup_dirty_ring(KVMState *s) return 0; } -static int kvm_init(MachineState *ms) +static int kvm_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); static const char upgrade_note[] = @@ -2582,15 +2617,13 @@ static int kvm_init(MachineState *ms) { /* end of list */ } }, *nc = num_cpus; int soft_vcpus_limit, hard_vcpus_limit; - KVMState *s; + KVMState *s = KVM_STATE(as); const KVMCapabilityInfo *missing_cap; int ret; int type; qemu_mutex_init(&kml_slots_lock); - s = KVM_STATE(ms->accelerator); - /* * On systems where the kernel can support different base page * sizes, host page size may be different from TARGET_PAGE_SIZE, @@ -2642,7 +2675,7 @@ static int kvm_init(MachineState *ms) goto err; } - ret = do_kvm_create_vm(ms, type); + ret = do_kvm_create_vm(s, type); if (ret < 0) { goto err; } @@ -3785,10 +3818,10 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) return r; } -static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, +static bool kvm_accel_has_memory(AccelState *accel, AddressSpace *as, hwaddr start_addr, hwaddr size) { - KVMState *kvm = KVM_STATE(ms->accelerator); + KVMState *kvm = KVM_STATE(accel); int i; for (i = 0; i < kvm->nr_as; ++i) { @@ -3979,7 +4012,7 @@ static void kvm_accel_instance_init(Object *obj) * Returns: SSTEP_* flags that KVM supports for guest debug. The * support is probed during kvm_init() */ -static int kvm_gdbstub_sstep_flags(void) +static int kvm_gdbstub_sstep_flags(AccelState *as) { return kvm_sstep_flags; } diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index 92bed92..2b83126 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -24,6 +24,7 @@ #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" +#include "accel/dummy-cpus.h" static int64_t qtest_clock_counter; @@ -37,7 +38,7 @@ static void qtest_set_virtual_clock(int64_t count) qatomic_set_i64(&qtest_clock_counter, count); } -static int qtest_init_accel(MachineState *ms) +static int qtest_init_accel(AccelState *as, MachineState *ms) { return 0; } @@ -66,6 +67,7 @@ static void qtest_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = dummy_start_vcpu_thread; ops->get_virtual_clock = qtest_get_virtual_clock; ops->set_virtual_clock = qtest_set_virtual_clock; + ops->handle_interrupt = generic_handle_interrupt; }; static const TypeInfo qtest_accel_ops_type = { diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index ecfd763..68cd33b 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -29,10 +29,6 @@ void kvm_flush_coalesced_mmio_buffer(void) { } -void kvm_cpu_synchronize_state(CPUState *cpu) -{ -} - bool kvm_has_sync_mmu(void) { return false; @@ -105,11 +101,6 @@ unsigned int kvm_get_free_memslots(void) return 0; } -void kvm_init_cpu_signals(CPUState *cpu) -{ - abort(); -} - bool kvm_arm_supports_user_irq(void) { return false; diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build index 8ca1a45..9dfc4f9 100644 --- a/accel/stubs/meson.build +++ b/accel/stubs/meson.build @@ -3,5 +3,7 @@ system_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) system_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) system_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) system_stubs_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) +system_stubs_ss.add(when: 'CONFIG_NVMM', if_false: files('nvmm-stub.c')) +system_stubs_ss.add(when: 'CONFIG_WHPX', if_false: files('whpx-stub.c')) specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: system_stubs_ss) diff --git a/accel/stubs/nvmm-stub.c b/accel/stubs/nvmm-stub.c new file mode 100644 index 0000000..ec14837 --- /dev/null +++ b/accel/stubs/nvmm-stub.c @@ -0,0 +1,12 @@ +/* + * NVMM stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/nvmm.h" + +bool nvmm_allowed; diff --git a/accel/stubs/whpx-stub.c b/accel/stubs/whpx-stub.c new file mode 100644 index 0000000..c564c89 --- /dev/null +++ b/accel/stubs/whpx-stub.c @@ -0,0 +1,12 @@ +/* + * WHPX stubs for QEMU + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "system/whpx.h" + +bool whpx_allowed; diff --git a/accel/tcg/internal-common.h b/accel/tcg/internal-common.h index 1dbc45d..77a3a06 100644 --- a/accel/tcg/internal-common.h +++ b/accel/tcg/internal-common.h @@ -139,4 +139,6 @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); +void tcg_dump_stats(GString *buf); + #endif diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c index 1c182b6..e7ed728 100644 --- a/accel/tcg/monitor.c +++ b/accel/tcg/monitor.c @@ -141,16 +141,26 @@ static void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) *pelide = elide; } -static void tcg_dump_info(GString *buf) +static void tcg_dump_flush_info(GString *buf) { - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); + size_t flush_full, flush_part, flush_elide; + + g_string_append_printf(buf, "TB flush count %u\n", + qatomic_read(&tb_ctx.tb_flush_count)); + g_string_append_printf(buf, "TB invalidate count %u\n", + qatomic_read(&tb_ctx.tb_phys_invalidate_count)); + + tlb_flush_counts(&flush_full, &flush_part, &flush_elide); + g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); + g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); + g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); } static void dump_exec_info(GString *buf) { struct tb_tree_stats tst = {}; struct qht_stats hst; - size_t nb_tbs, flush_full, flush_part, flush_elide; + size_t nb_tbs; tcg_tb_foreach(tb_tree_stats_iter, &tst); nb_tbs = tst.nb_tbs; @@ -187,50 +197,26 @@ static void dump_exec_info(GString *buf) qht_statistics_destroy(&hst); g_string_append_printf(buf, "\nStatistics:\n"); - g_string_append_printf(buf, "TB flush count %u\n", - qatomic_read(&tb_ctx.tb_flush_count)); - g_string_append_printf(buf, "TB invalidate count %u\n", - qatomic_read(&tb_ctx.tb_phys_invalidate_count)); - - tlb_flush_counts(&flush_full, &flush_part, &flush_elide); - g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); - g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); - g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); - tcg_dump_info(buf); + tcg_dump_flush_info(buf); } -HumanReadableText *qmp_x_query_jit(Error **errp) +void tcg_dump_stats(GString *buf) { - g_autoptr(GString) buf = g_string_new(""); - - if (!tcg_enabled()) { - error_setg(errp, "JIT information is only available with accel=tcg"); - return NULL; - } - dump_accel_info(buf); dump_exec_info(buf); dump_drift_info(buf); - - return human_readable_text_from_str(buf); -} - -static void tcg_dump_op_count(GString *buf) -{ - g_string_append_printf(buf, "[TCG profiler not compiled]\n"); } -HumanReadableText *qmp_x_query_opcount(Error **errp) +HumanReadableText *qmp_x_query_jit(Error **errp) { g_autoptr(GString) buf = g_string_new(""); if (!tcg_enabled()) { - error_setg(errp, - "Opcode count information is only available with accel=tcg"); + error_setg(errp, "JIT information is only available with accel=tcg"); return NULL; } - tcg_dump_op_count(buf); + tcg_dump_stats(buf); return human_readable_text_from_str(buf); } @@ -238,7 +224,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp) static void hmp_tcg_register(void) { monitor_register_hmp_info_hrt("jit", qmp_x_query_jit); - monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount); } type_init(hmp_tcg_register); diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index dfcee30..337b993 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -113,7 +113,6 @@ static void *mttcg_cpu_thread_fn(void *arg) } } - qatomic_set_mb(&cpu->exit_request, 0); qemu_wait_io_event(cpu); } while (!cpu->unplug || cpu_can_run(cpu)); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 6eec5c9..a578698 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -302,6 +302,8 @@ static void *rr_cpu_thread_fn(void *arg) rr_deal_with_unplugged_cpus(); } + rcu_unregister_thread(); + g_assert_not_reached(); } diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index b24d6a7..279dbfa 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -80,6 +80,9 @@ int tcg_cpu_exec(CPUState *cpu) cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); + + qatomic_set_mb(&cpu->exit_request, 0); + return ret; } @@ -93,8 +96,6 @@ static void tcg_cpu_reset_hold(CPUState *cpu) /* mask must never be zero, except for A20 change call */ void tcg_handle_interrupt(CPUState *cpu, int mask) { - g_assert(bql_locked()); - cpu->interrupt_request |= mask; /* @@ -198,8 +199,10 @@ static inline void tcg_remove_all_breakpoints(CPUState *cpu) cpu_watchpoint_remove_all(cpu, BP_GDB); } -static void tcg_accel_ops_init(AccelOpsClass *ops) +static void tcg_accel_ops_init(AccelClass *ac) { + AccelOpsClass *ops = ac->ops; + if (qemu_tcg_mttcg_enabled()) { ops->create_vcpu_thread = mttcg_start_vcpu_thread; ops->kick_vcpu_thread = mttcg_kick_vcpu_thread; diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 6e5dc33..5904582 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -80,9 +80,9 @@ static void tcg_accel_instance_init(Object *obj) bool one_insn_per_tb; -static int tcg_init_machine(MachineState *ms) +static int tcg_init_machine(AccelState *as, MachineState *ms) { - TCGState *s = TCG_STATE(current_accel()); + TCGState *s = TCG_STATE(as); unsigned max_threads = 1; #ifndef CONFIG_USER_ONLY @@ -219,7 +219,7 @@ static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp) qatomic_set(&one_insn_per_tb, value); } -static int tcg_gdbstub_supported_sstep_flags(void) +static int tcg_gdbstub_supported_sstep_flags(AccelState *as) { /* * In replay mode all events will come from the log and can't be diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c index de52a8f..bd0ff64 100644 --- a/accel/xen/xen-all.c +++ b/accel/xen/xen-all.c @@ -18,6 +18,7 @@ #include "hw/xen/xen_igd.h" #include "chardev/char.h" #include "qemu/accel.h" +#include "accel/dummy-cpus.h" #include "system/accel-ops.h" #include "system/cpus.h" #include "system/xen.h" @@ -63,7 +64,7 @@ static void xen_set_igd_gfx_passthru(Object *obj, bool value, Error **errp) xen_igd_gfx_pt_set(value, errp); } -static void xen_setup_post(MachineState *ms, AccelState *accel) +static void xen_setup_post(AccelState *as) { int rc; @@ -76,7 +77,7 @@ static void xen_setup_post(MachineState *ms, AccelState *accel) } } -static int xen_init(MachineState *ms) +static int xen_init(AccelState *as, MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); @@ -152,6 +153,7 @@ static void xen_accel_ops_class_init(ObjectClass *oc, const void *data) AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = dummy_start_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; } static const TypeInfo xen_accel_ops_type = { diff --git a/backends/iommufd.c b/backends/iommufd.c index c2c47ab..2a33c7a 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -16,12 +16,18 @@ #include "qemu/module.h" #include "qom/object_interfaces.h" #include "qemu/error-report.h" +#include "migration/cpr.h" #include "monitor/monitor.h" #include "trace.h" #include "hw/vfio/vfio-device.h" #include <sys/ioctl.h> #include <linux/iommufd.h> +static const char *iommufd_fd_name(IOMMUFDBackend *be) +{ + return object_get_canonical_path_component(OBJECT(be)); +} + static void iommufd_backend_init(Object *obj) { IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); @@ -64,26 +70,73 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc) return !be->users; } +static void iommufd_backend_complete(UserCreatable *uc, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); + const char *name = iommufd_fd_name(be); + + if (!be->owned) { + /* fd came from the command line. Fetch updated value from cpr state. */ + if (cpr_is_incoming()) { + be->fd = cpr_find_fd(name, 0); + } else { + cpr_save_fd(name, 0, be->fd); + } + } +} + static void iommufd_backend_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ucc->can_be_deleted = iommufd_backend_can_be_deleted; + ucc->complete = iommufd_backend_complete; object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); } +bool iommufd_change_process_capable(IOMMUFDBackend *be) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + + /* + * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl. + * This is a no-op if the process has not changed since DMA was mapped. + */ + return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); +} + +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); + + if (!ret) { + error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed", + be->fd); + } + trace_iommufd_change_process(be->fd, ret); + return ret; +} + bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) { int fd; if (be->owned && !be->users) { - fd = qemu_open("/dev/iommu", O_RDWR, errp); + fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp); if (fd < 0) { return false; } be->fd = fd; } + if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) { + if (be->owned) { + close(be->fd); + be->fd = -1; + } + return false; + } be->users++; trace_iommufd_backend_connect(be->fd, be->owned, be->users); @@ -96,9 +149,13 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) goto out; } be->users--; - if (!be->users && be->owned) { - close(be->fd); - be->fd = -1; + if (!be->users) { + vfio_iommufd_cpr_unregister_iommufd(be); + if (be->owned) { + cpr_delete_fd(iommufd_fd_name(be), 0); + close(be->fd); + be->fd = -1; + } } out: trace_iommufd_backend_disconnect(be->fd, be->users); @@ -172,6 +229,44 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, return ret; } +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, + int mfd, unsigned long start, bool readonly) +{ + int ret, fd = be->fd; + struct iommu_ioas_map_file map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .iova = iova, + .length = size, + }; + + if (cpr_is_incoming()) { + return 0; + } + + if (!readonly) { + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map); + trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start, + readonly, ret); + if (ret) { + ret = -errno; + + /* TODO: Not support mapping hardware PCI BAR region for now. */ + if (errno == EFAULT) { + warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?"); + } + } + return ret; +} + int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size) { @@ -183,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, .length = size, }; + if (cpr_is_incoming()) { + return 0; + } + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); /* * IOMMUFD takes mapping as some kind of object, unmapping diff --git a/backends/trace-events b/backends/trace-events index 7278214..56132d3 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -7,10 +7,12 @@ dbus_vmstate_loading(const char *id) "id: %s" dbus_vmstate_saving(const char *id) "id: %s" # iommufd.c +iommufd_change_process(int fd, bool ret) "fd=%d (%d)" iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" +iommufd_backend_map_file_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int fd, unsigned long start, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" fd=%d start=%ld readonly=%d (%d)" iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d" diff --git a/bsd-user/main.c b/bsd-user/main.c index 7c0a059..d0cc8e0 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -474,7 +474,7 @@ int main(int argc, char **argv) opt_one_insn_per_tb, &error_abort); object_property_set_int(OBJECT(accel), "tb-size", opt_tb_size, &error_abort); - ac->init_machine(NULL); + ac->init_machine(accel, NULL); } /* diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak index 39ed3a0..e485bbd 100644 --- a/configs/devices/riscv64-softmmu/default.mak +++ b/configs/devices/riscv64-softmmu/default.mak @@ -11,3 +11,4 @@ # CONFIG_RISCV_VIRT=n # CONFIG_MICROCHIP_PFSOC=n # CONFIG_SHAKTI_C=n +# CONFIG_XIANGSHAN_KUNMINGHU=n diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index b24c278..d50645a 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -315,6 +315,14 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name better reflects the way this property affects all random data within the device tree blob, not just the ``kaslr-seed`` node. +Arm ``ast2700a0-evb`` machine (since 10.1) +'''''''''''''''''''''''''''''''''''''''''' + +The ``ast2700a0-evb`` machine represents the first revision of the AST2700 +and serves as the initial engineering sample rather than a production version. +A newer revision, A1, is now supported, and the ``ast2700a1-evb`` should +replace the older A0 version. + Mips ``mipssim`` machine (since 10.0) ''''''''''''''''''''''''''''''''''''' diff --git a/docs/devel/migration/CPR.rst b/docs/devel/migration/CPR.rst index 7897873..0a0fd4f 100644 --- a/docs/devel/migration/CPR.rst +++ b/docs/devel/migration/CPR.rst @@ -152,8 +152,7 @@ cpr-transfer mode This mode allows the user to transfer a guest to a new QEMU instance on the same host with minimal guest pause time, by preserving guest RAM in place, albeit with new virtual addresses in new QEMU. Devices -and their pinned memory pages will also be preserved in a future QEMU -release. +and their pinned memory pages are also preserved for VFIO and IOMMUFD. The user starts new QEMU on the same host as old QEMU, with command- line arguments to create the same machine, plus the ``-incoming`` @@ -322,6 +321,6 @@ Futures cpr-transfer mode is based on a capability to transfer open file descriptors from old to new QEMU. In the future, descriptors for -vfio, iommufd, vhost, and char devices could be transferred, +vhost, and char devices could be transferred, preserving those devices and their kernel state without interruption, even if they do not explicitly support live migration. diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index 43d27d8..bec0a1d 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -1,5 +1,4 @@ -Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) -================================================================================================================================================================================================================================================================================================================================================================================================================================= +Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``gb200nvl-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) The QEMU Aspeed machines model BMCs of various OpenPOWER systems and Aspeed evaluation boards. They are based on different releases of the @@ -35,6 +34,7 @@ AST2600 SoC based machines : - ``fuji-bmc`` Facebook Fuji BMC - ``bletchley-bmc`` Facebook Bletchley BMC - ``fby35-bmc`` Facebook fby35 BMC +- ``gb200nvl-bmc`` Nvidia GB200nvl BMC - ``qcom-dc-scm-v1-bmc`` Qualcomm DC-SCM V1 BMC - ``qcom-firework-bmc`` Qualcomm Firework BMC diff --git a/docs/system/riscv/xiangshan-kunminghu.rst b/docs/system/riscv/xiangshan-kunminghu.rst new file mode 100644 index 0000000..46e7cee --- /dev/null +++ b/docs/system/riscv/xiangshan-kunminghu.rst @@ -0,0 +1,39 @@ +BOSC Xiangshan Kunminghu FPGA prototype platform (``xiangshan-kunminghu``) +========================================================================== +The ``xiangshan-kunminghu`` machine is compatible with our FPGA prototype +platform. + +XiangShan is an open-source high-performance RISC-V processor project. +The third generation processor is called Kunminghu. Kunminghu is a 64-bit +RV64GCBSUHV processor core. More information can be found in our Github +repository: +https://github.com/OpenXiangShan/XiangShan + +Supported devices +----------------- +The ``xiangshan-kunminghu`` machine supports the following devices: + +* Up to 16 xiangshan-kunminghu cores +* Core Local Interruptor (CLINT) +* Incoming MSI Controller (IMSIC) +* Advanced Platform-Level Interrupt Controller (APLIC) +* 1 UART + +Boot options +------------ +The ``xiangshan-kunminghu`` machine can start using the standard ``-bios`` +functionality for loading the boot image. You need to compile and link +the firmware, kernel, and Device Tree (FDT) into a single binary file, +such as ``fw_payload.bin``. + +Running +------- +Below is an example command line for running the ``xiangshan-kunminghu`` +machine: + +.. code-block:: bash + + $ qemu-system-riscv64 -machine xiangshan-kunminghu \ + -smp 16 -m 16G \ + -bios path/to/opensbi/platform/generic/firmware/fw_payload.bin \ + -nographic diff --git a/docs/system/target-riscv.rst b/docs/system/target-riscv.rst index 95457af..89b2cb7 100644 --- a/docs/system/target-riscv.rst +++ b/docs/system/target-riscv.rst @@ -71,6 +71,7 @@ undocumented; you can get a complete list by running riscv/shakti-c riscv/sifive_u riscv/virt + riscv/xiangshan-kunminghu RISC-V CPU firmware ------------------- diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 639a450..d797922 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -256,20 +256,6 @@ SRST Show dynamic compiler info. ERST -#if defined(CONFIG_TCG) - { - .name = "opcount", - .args_type = "", - .params = "", - .help = "show dynamic compiler opcode counters", - }, -#endif - -SRST - ``info opcount`` - Show dynamic compiler opcode counters -ERST - { .name = "sync-profile", .args_type = "mean:-m,no_coalesce:-n,max:i?", diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index f543d94..6ea8653 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -532,6 +532,7 @@ config ASPEED_SOC select I2C select DPS310 select PCA9552 + select PCA9554 select SERIAL_MM select SMBUS_EEPROM select PCA954X diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index d0b3336..c31bbe7 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -19,6 +19,7 @@ #include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/gpio/pca9552.h" +#include "hw/gpio/pca9554.h" #include "hw/nvram/eeprom_at24c.h" #include "hw/sensor/tmp105.h" #include "hw/misc/led.h" @@ -197,9 +198,12 @@ struct AspeedMachineState { #define FUJI_BMC_HW_STRAP2 0x00000000 /* Bletchley hardware value */ -/* TODO: Leave same as EVB for now. */ -#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 -#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 +#define BLETCHLEY_BMC_HW_STRAP1 0x00002000 +#define BLETCHLEY_BMC_HW_STRAP2 0x00000801 + +/* GB200NVL hardware value */ +#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 +#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 /* Qualcomm DC-SCM hardware value */ #define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000 @@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine) aspeed_board_init_flashes(&bmc->soc->spi[0], bmc->spi_model ? bmc->spi_model : amc->spi_model, 1, amc->num_cs); + aspeed_board_init_flashes(&bmc->soc->spi[1], + amc->spi2_model, 1, amc->num_cs2); } if (machine->kernel_filename && sc->num_cpus > 1) { @@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) TYPE_PCA9552, addr); } +static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr) +{ + return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id), + TYPE_PCA9554, addr); +} + static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc) } #define TYPE_TMP421 "tmp421" +#define TYPE_DS1338 "ds1338" + +/* Catalina hardware value */ +#define CATALINA_BMC_HW_STRAP1 0x00002002 +#define CATALINA_BMC_HW_STRAP2 0x00000800 + +#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) + +static void catalina_bmc_i2c_init(AspeedMachineState *bmc) +{ + /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */ + + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[16] = {}; + I2CSlave *i2c_mux; + + /* busses 0-15 are all used. */ + for (int i = 0; i < ARRAY_SIZE(i2c); i++) { + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* &i2c0 */ + /* i2c-mux@71 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71); + + /* i2c-mux@72 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72); + + /* i2c0mux1ch1 */ + /* io_expander7 - pca9535@20 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x20); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@73 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73); + + /* i2c-mux@75 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75); + + /* i2c-mux@76 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76); + + /* i2c0mux4ch1 */ + /* io_expander8 - pca9535@21 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x21); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@77 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77); + + + /* &i2c1 */ + /* i2c-mux@70 (PCA9548) on i2c1 */ + i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70); + /* i2c1mux0ch0 */ + /* ina238@41 - no model */ + /* ina238@42 - no model */ + /* ina238@44 - no model */ + /* i2c1mux0ch1 */ + /* ina238@41 - no model */ + /* ina238@43 - no model */ + /* i2c1mux0ch4 */ + /* ltc4287@42 - no model */ + /* ltc4287@43 - no model */ + + /* i2c1mux0ch5 */ + /* eeprom@54 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB); + /* tpm75@4f */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f); + + /* i2c1mux0ch6 */ + /* io_expander5 - pca9554@27 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9554, 0x27); + /* io_expander6 - pca9555@25 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9552, 0x25); + /* eeprom@51 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB); + + /* i2c1mux0ch7 */ + /* eeprom@53 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB); + /* temperature-sensor@4b - tmp75 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b); + + /* &i2c2 */ + /* io_expander0 - pca9555@20 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20); + /* io_expander0 - pca9555@21 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21); + /* io_expander0 - pca9555@27 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27); + /* eeprom@50 */ + at24c_eeprom_init(i2c[2], 0x50, 8 * KiB); + /* eeprom@51 */ + at24c_eeprom_init(i2c[2], 0x51, 8 * KiB); + + /* &i2c5 */ + /* i2c-mux@70 (PCA9548) on i2c5 */ + i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70); + /* i2c5mux0ch6 */ + /* eeprom@52 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB); + /* i2c5mux0ch7 */ + /* ina230@40 - no model */ + /* ina230@41 - no model */ + /* ina230@44 - no model */ + /* ina230@45 - no model */ + + /* &i2c6 */ + /* io_expander3 - pca9555@21 */ + i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21); + /* rtc@6f - nct3018y */ + i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f); + + /* &i2c9 */ + /* io_expander4 - pca9555@4f */ + i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f); + /* temperature-sensor@4b - tpm75 */ + i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b); + /* eeprom@50 */ + at24c_eeprom_init(i2c[9], 0x50, 8 * KiB); + /* eeprom@56 */ + at24c_eeprom_init(i2c[9], 0x56, 8 * KiB); + + /* &i2c10 */ + /* temperature-sensor@1f - tpm421 */ + i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f); + /* eeprom@50 */ + at24c_eeprom_init(i2c[10], 0x50, 8 * KiB); + + /* &i2c11 */ + /* ssif-bmc@10 - no model */ + + /* &i2c12 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[12], 0x50, 8 * KiB); + + /* &i2c13 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[13], 0x50, 8 * KiB); + /* eeprom@54 */ + at24c_eeprom_init(i2c[13], 0x54, 256); + /* eeprom@55 */ + at24c_eeprom_init(i2c[13], 0x55, 256); + /* eeprom@57 */ + at24c_eeprom_init(i2c[13], 0x57, 256); + + /* &i2c14 */ + /* io_expander9 - pca9555@10 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10); + /* io_expander10 - pca9555@11 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11); + /* io_expander11 - pca9555@12 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12); + /* io_expander12 - pca9555@13 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13); + /* io_expander13 - pca9555@14 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14); + /* io_expander14 - pca9555@15 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15); + + /* &i2c15 */ + /* temperature-sensor@1f - tmp421 */ + i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f); + /* eeprom@52 */ + at24c_eeprom_init(i2c[15], 0x52, 8 * KiB); +} static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) { @@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67); } + +static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[15] = {}; + DeviceState *dev; + for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) { + if ((i == 11) || (i == 12) || (i == 13)) { + continue; + } + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* Bus 5 Expander */ + create_pca9554(soc, 4, 0x21); + + /* Mux I2c Expanders */ + i2c_slave_create_simple(i2c[5], "pca9546", 0x71); + i2c_slave_create_simple(i2c[5], "pca9546", 0x72); + i2c_slave_create_simple(i2c[5], "pca9546", 0x73); + i2c_slave_create_simple(i2c[5], "pca9546", 0x75); + i2c_slave_create_simple(i2c[5], "pca9546", 0x76); + i2c_slave_create_simple(i2c[5], "pca9546", 0x77); + + /* Bus 10 */ + dev = DEVICE(create_pca9554(soc, 9, 0x20)); + + /* Set FPGA_READY */ + object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal); + + create_pca9554(soc, 9, 0x21); + at24c_eeprom_init(i2c[9], 0x50, 64 * KiB); + at24c_eeprom_init(i2c[9], 0x51, 64 * KiB); + + /* Bus 11 */ + at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid, + gb200nvl_bmc_fruid_len); +} + static void fby35_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, aspeed_machine_class_init_cpus_defaults(mc); } +static void aspeed_machine_catalina_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Catalina BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = CATALINA_BMC_HW_STRAP1; + amc->hw_strap2 = CATALINA_BMC_HW_STRAP2; + amc->fmc_model = "w25q01jvq"; + amc->spi_model = NULL; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC2_ON; + amc->i2c_init = catalina_bmc_i2c_init; + mc->auto_create_sdcard = true; + mc->default_ram_size = CATALINA_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + +#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB) + +static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1; + amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2; + amc->fmc_model = "mx66u51235f"; + amc->spi_model = "mx66u51235f"; + amc->num_cs = 2; + + amc->spi2_model = "mx66u51235f"; + amc->num_cs2 = 1; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; + amc->i2c_init = gb200nvl_bmc_i2c_init; + mc->default_ram_size = GB200NVL_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + static void fby35_reset(MachineState *state, ResetType type) { AspeedMachineState *bmc = ASPEED_MACHINE(state); @@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = { .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_bletchley_class_init, }, { + .name = MACHINE_TYPE_NAME("gb200nvl-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_gb200nvl_class_init, + }, { + .name = MACHINE_TYPE_NAME("catalina-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_catalina_class_init, + }, { .name = MACHINE_TYPE_NAME("fby35-bmc"), .parent = MACHINE_TYPE_NAME("ast2600-evb"), .class_init = aspeed_machine_fby35_class_init, diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index daa3d32..8bbbdec 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = { 0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, }; +const uint8_t gb200nvl_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f, + 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33, + 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, + 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, + 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, + 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a, + 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a, + 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44, + 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43, + 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30, + 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31, + 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31, + 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +}; + const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid); const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid); const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid); @@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid); const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid); const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid); const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid); +const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid); + diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h index f08c16e..3ed9bc1 100644 --- a/hw/arm/aspeed_eeprom.h +++ b/hw/arm/aspeed_eeprom.h @@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len; extern const uint8_t rainier_bmc_fruid[]; extern const size_t rainier_bmc_fruid_len; +extern const uint8_t gb200nvl_bmc_fruid[]; +extern const size_t gb200nvl_bmc_fruid_len; + #endif diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index 0fc89e7..9bc697a 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -128,8 +128,10 @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, s->txfifo |= SIFIVE_UART_TXFIFO_FULL; } - timer_mod(s->fifo_trigger_handle, current_time + - TX_INTERRUPT_TRIGGER_DELAY_NS); + if (!timer_pending(s->fifo_trigger_handle)) { + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); + } } static uint64_t diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index eb65bda..14d23e2 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -13,7 +13,7 @@ config SGX config TDX bool select X86_FW_OVMF - depends on KVM + depends on KVM && X86_64 config PC bool diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index b0139f0..4623cfa0 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" +#include "target/riscv/time_helper.h" #include "hw/qdev-properties.h" #include "hw/intc/riscv_aclint.h" #include "qemu/timer.h" @@ -240,6 +241,10 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), mtimer->hartid_base + i, mtimer->timecmp[i]); + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } return; } diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 8bcd9f4..4fa5f75 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -962,10 +962,18 @@ static const Property riscv_aplic_properties[] = { DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0), }; +static bool riscv_aplic_state_needed(void *opaque) +{ + RISCVAPLICState *aplic = opaque; + + return riscv_use_emulated_aplic(aplic->msimode); +} + static const VMStateDescription vmstate_riscv_aplic = { .name = "riscv_aplic", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, + .needed = riscv_aplic_state_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32(domaincfg, RISCVAPLICState), VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 2169988..6174e1a 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -398,10 +398,16 @@ static const Property riscv_imsic_properties[] = { DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), }; +static bool riscv_imsic_state_needed(void *opaque) +{ + return !kvm_irqchip_in_kernel(); +} + static const VMStateDescription vmstate_riscv_imsic = { .name = "riscv_imsic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .needed = riscv_imsic_state_needed, .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, num_pages, 0, diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 4930e00..a0ab5ee 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -91,6 +91,7 @@ #define BMC_DEV_ID TO_REG(0x1A4) #define AST2600_PROT_KEY TO_REG(0x00) +#define AST2600_PROT_KEY2 TO_REG(0x10) #define AST2600_SILICON_REV TO_REG(0x04) #define AST2600_SILICON_REV2 TO_REG(0x14) #define AST2600_SYS_RST_CTRL TO_REG(0x40) @@ -176,6 +177,7 @@ #define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330) #define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334) #define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388) +#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0) #define SCU_IO_REGION_SIZE 0x1000 @@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, int reg = TO_REG(offset); /* Truncate here so bitwise operations below behave as expected */ uint32_t data = data64; + bool prot_data_state = data == ASPEED_SCU_PROT_KEY; + bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2]; if (reg >= ASPEED_AST2600_SCU_NR_REGS) { qemu_log_mask(LOG_GUEST_ERROR, @@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, return; } - if (reg > PROT_KEY && !s->regs[PROT_KEY]) { + if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) { qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__); + return; } trace_aspeed_scu_write(offset, size, data); switch (reg) { case AST2600_PROT_KEY: - s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; + /* + * Writing a value to SCU000 will modify both protection + * registers to each protection register individually. + */ + s->regs[AST2600_PROT_KEY] = prot_data_state; + s->regs[AST2600_PROT_KEY2] = prot_data_state; + return; + case AST2600_PROT_KEY2: + s->regs[AST2600_PROT_KEY2] = prot_data_state; return; case AST2600_HW_STRAP1: case AST2600_HW_STRAP2: @@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset, s->regs[reg - 1] ^= data; updated = true; break; + case AST2700_SCUIO_FREQ_CNT_CTL: + s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1))); + updated = true; + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n", @@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = { [AST2700_SCUIO_UARTCLK_GEN] = 0x00014506, [AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0, [AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2, + [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080, }; static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data) diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c index f04d993..dff7cc3 100644 --- a/hw/misc/aspeed_sdmc.c +++ b/hw/misc/aspeed_sdmc.c @@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev) /* Set ram size bit and defaults values */ s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0); + /* Skipping dram init */ + s->regs[R_MAIN_CONTROL] = BIT(16); + if (s->unlocked) { s->regs[R_2700_PROT] = PROT_UNLOCKED; } diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index e6a0ac1..fc9c35b 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -119,3 +119,12 @@ config SPIKE select HTIF select RISCV_ACLINT select SIFIVE_PLIC + +config XIANGSHAN_KUNMINGHU + bool + default y + depends on RISCV64 + select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC + select SERIAL_MM diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index c22f3a7..2a8d5b1 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -13,5 +13,6 @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) +riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index 1017d73..47fe01b 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -79,6 +79,7 @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) #define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) #define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14) #define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) #define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) #define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index a877e5d..96a7fbd 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -1935,11 +1935,7 @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s) iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); } else { iova = iotlb.translated_addr & ~iotlb.addr_mask; - iova >>= TARGET_PAGE_BITS; - iova &= RISCV_IOMMU_TR_RESPONSE_PPN; - - /* We do not support superpages (> 4kbs) for now */ - iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); } riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); } @@ -2355,7 +2351,8 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) } if (s->enable_g_stage) { s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | - RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | + RISCV_IOMMU_CAP_SVRSW60T59B; } if (s->hpm_cntrs > 0) { diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index cf280a9..47e573f 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -311,8 +311,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket) size = riscv_socket_mem_size(ms, socket); mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); - qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size); qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory"); riscv_socket_fdt_write_id(ms, mem_name, socket); } @@ -324,7 +323,7 @@ static void create_fdt_socket_clint(RISCVVirtState *s, int cpu; g_autofree char *clint_name = NULL; g_autofree uint32_t *clint_cells = NULL; - unsigned long clint_addr; + hwaddr clint_addr; MachineState *ms = MACHINE(s); static const char * const clint_compat[2] = { "sifive,clint0", "riscv,clint0" @@ -340,14 +339,14 @@ static void create_fdt_socket_clint(RISCVVirtState *s, } clint_addr = s->memmap[VIRT_CLINT].base + - (s->memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + s->memmap[VIRT_CLINT].size * socket; + clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); - qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size); + qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg", + 2, clint_addr, 2, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); @@ -388,8 +387,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, RISCV_ACLINT_SWI_SIZE); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -411,11 +410,11 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mtimer"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, - 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr + RISCV_ACLINT_DEFAULT_MTIME, + 2, size - RISCV_ACLINT_DEFAULT_MTIME, + 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 2, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mtimer_cells, aclint_cells_size); riscv_socket_fdt_write_id(ms, name, socket); @@ -429,8 +428,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -494,8 +493,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, s->soc[socket].num_harts * sizeof(uint32_t) * 4); } - qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg", + 2, plic_addr, 2, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); @@ -656,8 +655,8 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg", + 2, aplic_addr, 2, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", VIRT_IRQCHIP_NUM_SOURCES); @@ -857,9 +856,7 @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, - 0x0, size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -897,8 +894,8 @@ static void create_fdt_pcie(RISCVVirtState *s, if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, + s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, @@ -935,8 +932,9 @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) qemu_fdt_setprop_string_array(ms->fdt, name, "compatible", (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_TEST].base, + 2, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); @@ -968,9 +966,9 @@ static void create_fdt_uart(RISCVVirtState *s, s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_UART0].base, - 0x0, s->memmap[VIRT_UART0].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_UART0].base, + 2, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -994,8 +992,9 @@ static void create_fdt_rtc(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_RTC].base, + 2, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -1089,8 +1088,7 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); - qemu_fdt_setprop_cells(fdt, iommu_node, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c new file mode 100644 index 0000000..a95fd61 --- /dev/null +++ b/hw/riscv/xiangshan_kmh.c @@ -0,0 +1,220 @@ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Provides a board compatible with the Xiangshan Kunminghu + * FPGA prototype platform: + * + * 0) UART (16550A) + * 1) CLINT (Core-Local Interruptor) + * 2) IMSIC (Incoming MSI Controller) + * 3) APLIC (Advanced Platform-Level Interrupt Controller) + * + * More information can be found in our Github repository: + * https://github.com/OpenXiangShan/XiangShan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/boot.h" +#include "hw/riscv/xiangshan_kmh.h" +#include "hw/riscv/riscv_hart.h" +#include "system/system.h" + +static const MemMapEntry xiangshan_kmh_memmap[] = { + [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 }, + [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 }, + [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 }, + [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 }, + [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 }, + [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 }, + [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 }, + [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 }, +}; + +static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts) +{ + int i; + const MemMapEntry *memmap = xiangshan_kmh_memmap; + hwaddr addr = 0; + DeviceState *aplic_m = NULL; + + /* M-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_M].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true, + 1, XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* S-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_S].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + + i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS), + i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS, + XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base, + memmap[XIANGSHAN_KMH_APLIC_M].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, true, NULL); + + /* S-level APLIC */ + riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base, + memmap[XIANGSHAN_KMH_APLIC_S].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, false, aplic_m); + + return aplic_m; +} + +static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + uint32_t num_harts = ms->smp.cpus; + + qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts); + qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0); + qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type", + TYPE_RISCV_CPU_XIANGSHAN_KMH); + sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal); + + /* AIA */ + s->irqchip = xiangshan_kmh_create_aia(num_harts); + + /* UART */ + serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2, + qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* CLINT */ + riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base, + 0, num_harts, false); + riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true); + + /* ROM */ + memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom", + memmap[XIANGSHAN_KMH_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_ROM].base, &s->rom); +} + +static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xiangshan_kmh_soc_realize; + dc->user_creatable = false; +} + +static void xiangshan_kmh_soc_instance_init(Object *obj) +{ + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj); + + object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); +} + +static const TypeInfo xiangshan_kmh_soc_info = { + .name = TYPE_XIANGSHAN_KMH_SOC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiangshanKmhSoCState), + .instance_init = xiangshan_kmh_soc_instance_init, + .class_init = xiangshan_kmh_soc_class_init, +}; + +static void xiangshan_kmh_soc_register_types(void) +{ + type_register_static(&xiangshan_kmh_soc_info); +} +type_init(xiangshan_kmh_soc_register_types) + +static void xiangshan_kmh_machine_init(MachineState *machine) +{ + XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base; + + /* Initialize SoC */ + object_initialize_child(OBJECT(machine), "soc", &s->soc, + TYPE_XIANGSHAN_KMH_SOC); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + + /* Register RAM */ + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_DRAM].base, + machine->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.cpus, + start_addr, + memmap[XIANGSHAN_KMH_ROM].base, + memmap[XIANGSHAN_KMH_ROM].size, 0, 0); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, &start_addr, NULL); + } + + /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */ +} + +static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + static const char *const valid_cpu_types[] = { + TYPE_RISCV_CPU_XIANGSHAN_KMH, + NULL + }; + + mc->desc = "RISC-V Board compatible with the Xiangshan " \ + "Kunminghu FPGA prototype platform"; + mc->init = xiangshan_kmh_machine_init; + mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS; + mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH; + mc->valid_cpu_types = valid_cpu_types; + mc->default_ram_id = "xiangshan.kunminghu.ram"; +} + +static const TypeInfo xiangshan_kmh_machine_info = { + .name = TYPE_XIANGSHAN_KMH_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(XiangshanKmhState), + .class_init = xiangshan_kmh_machine_class_init, +}; + +static void xiangshan_kmh_machine_register_types(void) +{ + type_register_static(&xiangshan_kmh_machine_info); +} +type_init(xiangshan_kmh_machine_register_types) diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index 3133fef..d318e6a 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -13,7 +13,6 @@ #include "hw/vfio-user/container.h" #include "hw/vfio-user/device.h" #include "hw/vfio-user/trace.h" -#include "hw/vfio/vfio-cpr.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" #include "qapi/error.h" @@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, bcontainer = &container->bcontainer; - if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - ret = ram_block_uncoordinated_discard_disable(true); if (ret) { error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + goto free_container_exit; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); @@ -261,9 +256,6 @@ listener_release_exit: enable_discards_exit: ram_block_uncoordinated_discard_disable(false); -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); - free_container_exit: object_unref(container); @@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container) vioc->release(bcontainer); } - vfio_cpr_unregister_container(bcontainer); object_unref(container); vfio_address_space_put(space); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1df4438..7719f24 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); } static void vfio_ap_unrealize(DeviceState *dev) @@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev) vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); - g_free(vapdev->vdev.name); + vfio_device_free_name(&vapdev->vdev); } static const Property vfio_ap_properties[] = { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index cea9d6e..9560b8d 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -619,7 +619,7 @@ out_io_notifier_err: out_region_err: vfio_device_detach(vbasedev); out_attach_dev_err: - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); @@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev) vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_device_detach(&vcdev->vdev); - g_free(vcdev->vdev.name); + vfio_device_free_name(&vcdev->vdev); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index d834bd4..5630497 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + RAMBlock *rb = mr->ram_block; + int mfd = rb ? qemu_ram_get_fd(rb) : -1; + if (mfd >= 0 && vioc->dma_map_file) { + unsigned long start = vaddr - qemu_ram_get_host_addr(rb); + unsigned long offset = qemu_ram_get_fd_offset(rb); + + return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, + readonly); + } g_assert(vioc->dma_map); return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c new file mode 100644 index 0000000..148a06d --- /dev/null +++ b/hw/vfio/cpr-iommufd.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2024-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "system/iommufd.h" +#include "vfio-iommufd.h" +#include "trace.h" + +typedef struct CprVFIODevice { + char *name; + unsigned int namelen; + uint32_t ioas_id; + int devid; + uint32_t hwpt_id; + QLIST_ENTRY(CprVFIODevice) next; +} CprVFIODevice; + +static const VMStateDescription vmstate_cpr_vfio_device = { + .name = "cpr vfio device", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(namelen, CprVFIODevice), + VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen), + VMSTATE_INT32(devid, CprVFIODevice), + VMSTATE_UINT32(ioas_id, CprVFIODevice), + VMSTATE_UINT32(hwpt_id, CprVFIODevice), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device, + CprVFIODevice, next), + VMSTATE_END_OF_LIST() + } +}; + +static void vfio_cpr_save_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = g_new0(CprVFIODevice, 1); + + elem->name = g_strdup(vbasedev->name); + elem->namelen = strlen(vbasedev->name) + 1; + elem->ioas_id = vbasedev->cpr.ioas_id; + elem->devid = vbasedev->devid; + elem->hwpt_id = vbasedev->cpr.hwpt_id; + QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next); +} + +static CprVFIODevice *find_device(const char *name) +{ + CprVFIODeviceList *head = &cpr_state.vfio_devices; + CprVFIODevice *elem; + + QLIST_FOREACH(elem, head, next) { + if (!strcmp(elem->name, name)) { + return elem; + } + } + return NULL; +} + +static void vfio_cpr_delete_device(const char *name) +{ + CprVFIODevice *elem = find_device(name); + + if (elem) { + QLIST_REMOVE(elem, next); + g_free(elem->name); + g_free(elem); + } +} + +static bool vfio_cpr_find_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = find_device(vbasedev->name); + + if (elem) { + vbasedev->cpr.ioas_id = elem->ioas_id; + vbasedev->devid = elem->devid; + vbasedev->cpr.hwpt_id = elem->hwpt_id; + trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id); + return true; + } + return false; +} + +static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) +{ + if (!iommufd_change_process_capable(be)) { + if (errp) { + error_setg(errp, "vfio iommufd backend does not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + } + return false; + } + return true; +} + +static int iommufd_cpr_pre_save(void *opaque) +{ + IOMMUFDBackend *be = opaque; + + /* + * The process has not changed yet, but proactively try the ioctl, + * and it will fail if any DMA mappings are not supported. + */ + if (!iommufd_change_process_capable(be)) { + error_report("some memory regions do not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + return -1; + } + return 0; +} + +static int iommufd_cpr_post_load(void *opaque, int version_id) +{ + IOMMUFDBackend *be = opaque; + Error *local_err = NULL; + + if (!iommufd_change_process(be, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static const VMStateDescription iommufd_cpr_vmstate = { + .name = "iommufd", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = iommufd_cpr_pre_save, + .post_load = iommufd_cpr_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) +{ + Error **cpr_blocker = &be->cpr_blocker; + + if (!vfio_cpr_supported(be, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); + + return true; +} + +void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) +{ + vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); + migrate_del_blocker(&be->cpr_blocker); +} + +bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, + Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + vfio_cpr_add_kvm_notifier(); + + return true; +} + +void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} + +void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) +{ + if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); + vfio_cpr_save_device(vbasedev); + } +} + +void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) +{ + cpr_delete_fd(vbasedev->name, 0); + vfio_cpr_delete_device(vbasedev->name); +} + +void vfio_cpr_load_device(VFIODevice *vbasedev) +{ + if (cpr_is_incoming()) { + bool ret = vfio_cpr_find_device(vbasedev); + g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } + } +} diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index a84c324..553b203 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id) { VFIOContainer *container = opaque; VFIOContainerBase *bcontainer = &container->bcontainer; - VFIOGroup *group; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; Error *local_err = NULL; + /* During incoming CPR, divert calls to dma_map. */ + vioc->dma_map = vfio_legacy_cpr_dma_map; + if (!vfio_listener_register(bcontainer, &local_err)) { error_report_err(local_err); return -1; } - QLIST_FOREACH(group, &container->group_list, container_next) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + /* Restore original dma_map function */ + vioc->dma_map = saved_dma_map; - /* Restore original dma_map function */ - vioc->dma_map = container->cpr.saved_dma_map; - } return 0; } @@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, */ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; vioc->dma_map = vfio_legacy_cpr_dma_map; container->cpr.remap_listener = (MemoryListener) { @@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, bcontainer->space->as); memory_listener_unregister(&container->cpr.remap_listener); container->cpr.vaddr_unmapped = false; - vioc->dma_map = container->cpr.saved_dma_map; + vioc->dma_map = saved_dma_map; } return 0; } @@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) MIG_MODE_CPR_TRANSFER, -1) == 0; } - vmstate_register(NULL, -1, &vfio_container_vmstate, container); + vfio_cpr_add_kvm_notifier(); - /* During incoming CPR, divert calls to dma_map. */ - if (cpr_is_incoming()) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - container->cpr.saved_dma_map = vioc->dma_map; - vioc->dma_map = vfio_legacy_cpr_dma_map; - } + vmstate_register(NULL, -1, &vfio_container_vmstate, container); migration_add_notifier_mode(&container->cpr.transfer_notifier, vfio_cpr_fail_notifier, diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index fdbb58e..af0f12a 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -9,6 +9,8 @@ #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-cpr.h" #include "hw/vfio/pci.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" #include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" @@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, return 0; } -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp) +#define STRDUP_VECTOR_FD_NAME(vdev, name) \ + g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name)) + +void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr, + int fd) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_save_fd(fdname, nr, fd); +} + +int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + return cpr_find_fd(fdname, nr); +} + +void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) { - migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, - vfio_cpr_reboot_notifier, - MIG_MODE_CPR_REBOOT); - return true; + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_delete_fd(fdname, nr); } -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) +static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, + bool msix) { - migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + int i, fd; + bool pending = false; + PCIDevice *pdev = &vdev->pdev; + + vdev->nr_vectors = nr_vectors; + vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors); + vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI; + + vfio_pci_prepare_kvm_msi_virq_batch(vdev); + + for (i = 0; i < nr_vectors; i++) { + VFIOMSIVector *vector = &vdev->msi_vectors[i]; + + fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i); + if (fd >= 0) { + vfio_pci_vector_init(vdev, i); + vfio_pci_msi_set_handler(vdev, i); + } + + if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) { + vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix); + } else { + vdev->msi_vectors[i].virq = -1; + } + + if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) { + set_bit(i, vdev->msix->pending); + pending = true; + } + } + + vfio_pci_commit_kvm_msi_virq_batch(vdev); + + if (msix) { + memory_region_set_enabled(&pdev->msix_pba_mmio, pending); + } } /* @@ -58,13 +110,91 @@ static int vfio_cpr_pci_pre_load(void *opaque) return 0; } +static int vfio_cpr_pci_post_load(void *opaque, int version_id) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int nr_vectors; + + if (msix_enabled(pdev)) { + vfio_pci_msix_set_notifiers(vdev); + nr_vectors = vdev->msix->entries; + vfio_cpr_claim_vectors(vdev, nr_vectors, true); + + } else if (msi_enabled(pdev)) { + nr_vectors = msi_nr_vectors_allocated(pdev); + vfio_cpr_claim_vectors(vdev, nr_vectors, false); + + } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { + Error *local_err = NULL; + if (!vfio_pci_intx_enable(vdev, &local_err)) { + error_report_err(local_err); + return -1; + } + } + + return 0; +} + +static bool pci_msix_present(void *opaque, int version_id) +{ + PCIDevice *pdev = opaque; + + return msix_present(pdev); +} + +static const VMStateDescription vfio_intx_vmstate = { + .name = "vfio-cpr-intx", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_BOOL(pending, VFIOINTx), + VMSTATE_UINT32(route.mode, VFIOINTx), + VMSTATE_INT32(route.irq, VFIOINTx), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_VFIO_INTX(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(VFIOINTx), \ + .vmsd = &vfio_intx_vmstate, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \ +} + const VMStateDescription vfio_cpr_pci_vmstate = { .name = "vfio-cpr-pci", .version_id = 0, .minimum_version_id = 0, .pre_load = vfio_cpr_pci_pre_load, + .post_load = vfio_cpr_pci_post_load, .needed = cpr_incoming_needed, .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), + VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present), + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), VMSTATE_END_OF_LIST() } }; + +static NotifierWithReturn kvm_close_notifier; + +static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, + Error **errp) +{ + if (e->type == MIG_EVENT_PRECOPY_DONE) { + vfio_kvm_device_close(); + } + return 0; +} + +void vfio_cpr_add_kvm_notifier(void) +{ + if (!kvm_close_notifier.notify) { + migration_add_notifier_mode(&kvm_close_notifier, + vfio_cpr_kvm_close_notifier, + MIG_MODE_CPR_TRANSFER); + } +} diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d91c695..96cf214 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -28,6 +28,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "monitor/monitor.h" #include "vfio-helpers.h" @@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break. + * The fd number changes across processes, so this cannot be + * used as an invariant name for CPR. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, + errp, MIG_MODE_CPR_TRANSFER, + -1) == 0; + } } } return true; } -void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +void vfio_device_free_name(VFIODevice *vbasedev) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); + g_clear_pointer(&vbasedev->name, g_free); + migrate_del_blocker(&vbasedev->cpr.id_blocker); +} - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +{ + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index d0dbab1..9a5f621 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, int vfio_kvm_device_fd = -1; #endif +void vfio_kvm_device_close(void) +{ +#ifdef CONFIG_KVM + kvm_close(); + if (vfio_kvm_device_fd != -1) { + close(vfio_kvm_device_fd); + vfio_kvm_device_fd = -1; + } +#endif +} + int vfio_kvm_device_add_fd(int fd, Error **errp) { #ifdef CONFIG_KVM diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c new file mode 100644 index 0000000..0be5276 --- /dev/null +++ b/hw/vfio/iommufd-stubs.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpr.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index d3efef7..48c590b 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -25,6 +25,7 @@ #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/cpr.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" @@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, iova, size, vaddr, readonly); } +static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_file_dma(container->be, + container->ioas_id, + iova, size, fd, start, readonly); +} + static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) @@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) goto err_kvm_device_add; } + if (cpr_is_incoming()) { + goto skip_bind; + } + /* Bind device to iommufd */ bind.iommufd = iommufd->fd; if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { @@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) vbasedev->devid = bind.out_devid; trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, vbasedev->fd, vbasedev->devid); + +skip_bind: return true; err_bind: iommufd_cdev_kvm_device_del(vbasedev); @@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, /* Try to find a domain */ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (!cpr_is_incoming()) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { + ret = 0; + } else { + continue; + } + if (ret) { /* -EINVAL means the domain is incompatible with the device. */ if (ret == -EINVAL) { @@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } else { vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); return true; @@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } + if (cpr_is_incoming()) { + hwpt_id = vbasedev->cpr.hwpt_id; + goto skip_alloc; + } + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, container->ioas_id, flags, IOMMU_HWPT_DATA_NONE, 0, NULL, @@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt_id); + return false; + } + +skip_alloc: hwpt = g_malloc0(sizeof(*hwpt)); hwpt->hwpt_id = hwpt_id; hwpt->hwpt_flags = flags; QLIST_INIT(&hwpt->device_list); - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); - if (ret) { - iommufd_backend_free_id(container->be, hwpt->hwpt_id); - g_free(hwpt); - return false; - } - vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); @@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, return iommufd_cdev_autodomains_get(vbasedev, container, errp); } - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* If CPR, we are already attached to ioas_id. */ + return cpr_is_incoming() || + !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - vfio_cpr_unregister_container(bcontainer); + vfio_iommufd_cpr_unregister_container(container); vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); @@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; int ret, devfd; + bool res; uint32_t ioas_id; Error *err = NULL; const VFIOIOMMUClass *iommufd_vioc = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + vfio_cpr_load_device(vbasedev); + if (vbasedev->fd < 0) { devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); if (devfd < 0) { @@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, vbasedev->iommufd != container->be) { continue; } - if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { + + if (!cpr_is_incoming()) { + res = iommufd_cdev_attach_container(vbasedev, container, &err); + } else if (vbasedev->cpr.ioas_id == container->ioas_id) { + res = true; + } else { + continue; + } + + if (!res) { const char *msg = error_get_pretty(err); trace_iommufd_cdev_fail_attach_existing_container(msg); @@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } } + if (cpr_is_incoming()) { + ioas_id = vbasedev->cpr.ioas_id; + goto skip_ioas_alloc; + } + /* Need to allocate a new dedicated container */ if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { goto err_alloc_ioas; @@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc: container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; QLIST_INIT(&container->hwpt_list); + vbasedev->cpr.ioas_id = ioas_id; bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -611,6 +665,7 @@ found_container: } vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } @@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); vioc->dma_map = iommufd_cdev_map; + vioc->dma_map_file = iommufd_cdev_map_file; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 63ea393..bfaf6be 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( )) system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( 'iommufd.c', + 'cpr-iommufd.c', )) +system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c')) system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', )) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index fa25bde..1093b28 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,6 +29,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "hw/vfio/vfio-cpr.h" #include "migration/vmstate.h" #include "migration/cpr.h" #include "qobject/qdict.h" @@ -57,20 +58,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +/* Create new or reuse existing eventfd */ static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr, Error **errp) { - int ret = event_notifier_init(e, 0); + int fd, ret; + fd = vfio_cpr_load_vector_fd(vdev, name, nr); + if (fd >= 0) { + event_notifier_init_fd(e, fd); + return true; + } + + ret = event_notifier_init(e, 0); if (ret) { error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + return false; } - return !ret; + + fd = event_notifier_get_fd(e); + vfio_cpr_save_vector_fd(vdev, name, nr, fd); + return true; } static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr) { + vfio_cpr_delete_vector_fd(vdev, name, nr); event_notifier_cleanup(e); } @@ -196,6 +210,36 @@ fail: #endif } +static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) +{ +#ifdef CONFIG_KVM + if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_resamplefds_enabled()) { + return true; + } + + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { + return false; + } + + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &vdev->intx.interrupt, + &vdev->intx.unmask, + vdev->intx.route.irq)) { + error_setg_errno(errp, errno, "failed to setup resample irqfd"); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); + return false; + } + + vdev->intx.kvm_accel = true; + trace_vfio_intx_enable_kvm(vdev->vbasedev.name); + return true; +#else + return true; +#endif +} + static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM @@ -291,7 +335,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) return true; } - vfio_disable_interrupts(vdev); + /* + * Do not alter interrupt state during vfio_realize and cpr load. + * The incoming state is cleared thereafter. + */ + if (!cpr_is_incoming()) { + vfio_disable_interrupts(vdev); + } vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ pci_config_set_interrupt_pin(vdev->pdev.config, pin); @@ -314,6 +364,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev); + + if (cpr_is_incoming()) { + if (!vfio_cpr_intx_enable_kvm(vdev, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); + } + goto skip_signaling; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); @@ -325,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } +skip_signaling: vdev->interrupt = VFIO_INT_INTx; trace_vfio_intx_enable(vdev->vbasedev.name); @@ -394,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque) notify(&vdev->pdev, nr); } +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + int fd = event_notifier_get_fd(&vector->interrupt); + + qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector); +} + /* * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid * fd to kernel. @@ -656,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, static int vfio_msix_vector_use(PCIDevice *pdev, unsigned int nr, MSIMessage msg) { + /* + * Ignore the callback from msix_set_vector_notifiers during resume. + * The necessary subset of these actions is called from + * vfio_cpr_claim_vectors during post load. + */ + if (cpr_is_incoming()) { + return 0; + } + return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt); } @@ -686,6 +762,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev) +{ + msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + vfio_msix_vector_release, NULL); +} + void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); @@ -2914,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev) vfio_device_detach(&vdev->vbasedev); - g_free(vdev->vbasedev.name); + vfio_device_free_name(&vdev->vbasedev); g_free(vdev->msix); } @@ -2965,6 +3047,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->err_notifier); qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3032,6 +3119,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->req_notifier); qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + vdev->req_enabled = true; + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3189,7 +3282,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) vfio_intx_routing_notifier); vdev->irqchip_change_notifier.notify = vfio_irqchip_change; kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - if (!vfio_intx_enable(vdev, errp)) { + + /* + * During CPR, do not call vfio_intx_enable at this time. Instead, + * call it from vfio_pci_post_load after the intx routing data has + * been loaded from vmstate. + */ + if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) { timer_free(vdev->intx.mmap_timer); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 5ba7330..495fae7 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -218,6 +218,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev); void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev); bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev); +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr); uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 9a21f2e..5c1795a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { /* @fd takes precedence over @sysfsdev which takes precedence over @host */ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); } else if (vbasedev->fd < 0) { if (!vbasedev->name || strchr(vbasedev->name, '/')) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e1728c4..8ec0ad0 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -197,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +# cpr-iommufd.c +vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u" + # device.c vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" vfio_device_reset_handler(void) "" diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index a684855..9b658a3 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -85,6 +85,7 @@ void qemu_ram_unset_idstr(RAMBlock *block); const char *qemu_ram_get_idstr(RAMBlock *rb); void *qemu_ram_get_host_addr(RAMBlock *rb); ram_addr_t qemu_ram_get_offset(RAMBlock *rb); +ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb); ram_addr_t qemu_ram_get_used_length(RAMBlock *rb); ram_addr_t qemu_ram_get_max_length(RAMBlock *rb); bool qemu_ram_is_shared(RAMBlock *rb); diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h index 973277b..6c36455 100644 --- a/include/hw/arm/aspeed.h +++ b/include/hw/arm/aspeed.h @@ -35,7 +35,9 @@ struct AspeedMachineClass { uint32_t hw_strap2; const char *fmc_model; const char *spi_model; + const char *spi2_model; uint32_t num_cs; + uint32_t num_cs2; uint32_t macs_mask; void (*i2c_init)(AspeedMachineState *bmc); uint32_t uart_default; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 162a56a..5eaf41a 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -442,6 +442,7 @@ struct qemu_work_item; * @opaque: User data. * @mem_io_pc: Host Program Counter at which the memory was accessed. * @accel: Pointer to accelerator specific state. + * @vcpu_dirty: Hardware accelerator is not synchronized with QEMU state * @kvm_fd: vCPU file descriptor for KVM. * @work_mutex: Lock to prevent multiple access to @work_list. * @work_list: List of pending asynchronous work. @@ -538,7 +539,6 @@ struct CPUState { uint32_t kvm_fetch_index; uint64_t dirty_pages; int kvm_vcpu_stats_fd; - bool vcpu_dirty; /* Use by accel-block: CPU is executing an ioctl() */ QemuLockCnt in_ioctl_lock; @@ -554,6 +554,7 @@ struct CPUState { uint32_t halted; int32_t exception_index; + bool vcpu_dirty; AccelCPUState *accel; /* Used to keep track of an outstanding cpu throttle thread for migration diff --git a/include/hw/riscv/xiangshan_kmh.h b/include/hw/riscv/xiangshan_kmh.h new file mode 100644 index 0000000..c5dc6b1 --- /dev/null +++ b/include/hw/riscv/xiangshan_kmh.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * + */ + +#ifndef HW_XIANGSHAN_KMH_H +#define HW_XIANGSHAN_KMH_H + +#include "hw/boards.h" +#include "hw/riscv/riscv_hart.h" + +#define XIANGSHAN_KMH_MAX_CPUS 16 + +typedef struct XiangshanKmhSoCState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + RISCVHartArrayState cpus; + DeviceState *irqchip; + MemoryRegion rom; +} XiangshanKmhSoCState; + +#define TYPE_XIANGSHAN_KMH_SOC "xiangshan.kunminghu.soc" +DECLARE_INSTANCE_CHECKER(XiangshanKmhSoCState, XIANGSHAN_KMH_SOC, + TYPE_XIANGSHAN_KMH_SOC) + +typedef struct XiangshanKmhState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + XiangshanKmhSoCState soc; +} XiangshanKmhState; + +#define TYPE_XIANGSHAN_KMH_MACHINE MACHINE_TYPE_NAME("xiangshan-kunminghu") +DECLARE_INSTANCE_CHECKER(XiangshanKmhState, XIANGSHAN_KMH_MACHINE, + TYPE_XIANGSHAN_KMH_MACHINE) + +enum { + XIANGSHAN_KMH_ROM, + XIANGSHAN_KMH_UART0, + XIANGSHAN_KMH_CLINT, + XIANGSHAN_KMH_APLIC_M, + XIANGSHAN_KMH_APLIC_S, + XIANGSHAN_KMH_IMSIC_M, + XIANGSHAN_KMH_IMSIC_S, + XIANGSHAN_KMH_DRAM, +}; + +enum { + XIANGSHAN_KMH_UART0_IRQ = 10, +}; + +/* Indicating Timebase-freq (1MHZ) */ +#define XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ 1000000 + +#define XIANGSHAN_KMH_IMSIC_NUM_IDS 255 +#define XIANGSHAN_KMH_IMSIC_NUM_GUESTS 7 +#define XIANGSHAN_KMH_IMSIC_GUEST_BITS 3 + +#define XIANGSHAN_KMH_APLIC_NUM_SOURCES 96 + +#endif diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3cd86ec..bded6e9 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -168,6 +168,21 @@ struct VFIOIOMMUClass { hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr); /** + * @dma_map_file + * + * Map a file range for the container. + * + * @bcontainer: #VFIOContainerBase to use for map + * @iova: start address to map + * @size: size of the range to map + * @fd: descriptor of the file to map + * @start: starting file offset of the range to map + * @readonly: map read only if true + */ + int (*dma_map_file)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly); + /** * @dma_unmap * * Unmap an address range from the container. diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index 8bf85b9..80ad20d 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -15,19 +15,27 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +struct VFIODevice; +struct VFIOPCIDevice; +struct VFIOIOMMUFDContainer; +struct IOMMUFDBackend; + +typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, void *vaddr, + bool readonly, MemoryRegion *mr); typedef struct VFIOContainerCPR { Error *blocker; bool vaddr_unmapped; NotifierWithReturn transfer_notifier; MemoryListener remap_listener; - int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly, MemoryRegion *mr); } VFIOContainerCPR; typedef struct VFIODeviceCPR { Error *mdev_blocker; + Error *id_blocker; + uint32_t hwpt_id; + uint32_t ioas_id; } VFIODeviceCPR; bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, @@ -37,9 +45,15 @@ void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp); -bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, - Error **errp); -void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); +bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container, + Error **errp); +void vfio_iommufd_cpr_unregister_container( + struct VFIOIOMMUFDContainer *container); +bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp); +void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be); +void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev); +void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev); +void vfio_cpr_load_device(struct VFIODevice *vbasedev); int vfio_cpr_group_get_device_fd(int d, const char *name); @@ -52,6 +66,16 @@ void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, bool vfio_cpr_ram_discard_register_listener( struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); +void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr, int fd); +int vfio_cpr_load_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); +void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, + int nr); + extern const VMStateDescription vfio_cpr_pci_vmstate; +extern const VMStateDescription vmstate_cpr_vfio_devices; + +void vfio_cpr_add_kvm_notifier(void); #endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index c616652..1901a35 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -279,8 +279,11 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, /* Returns 0 on success, or a negative errno. */ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +void vfio_device_free_name(VFIODevice *vbasedev); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); int vfio_device_get_aw_bits(VFIODevice *vdev); + +void vfio_kvm_device_close(void); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/migration/cpr.h b/include/migration/cpr.h index 07858e9..3fc19a7 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -9,11 +9,23 @@ #define MIGRATION_CPR_H #include "qapi/qapi-types-migration.h" +#include "qemu/queue.h" #define MIG_MODE_NONE -1 #define QEMU_CPR_FILE_MAGIC 0x51435052 #define QEMU_CPR_FILE_VERSION 0x00000001 +#define CPR_STATE "CprState" + +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList; +typedef QLIST_HEAD(CprVFIODeviceList, CprVFIODevice) CprVFIODeviceList; + +typedef struct CprState { + CprFdList fds; + CprVFIODeviceList vfio_devices; +} CprState; + +extern CprState cpr_state; void cpr_save_fd(const char *name, int id, int fd); void cpr_delete_fd(const char *name, int id); @@ -32,6 +44,8 @@ void cpr_state_close(void); struct QIOChannel *cpr_state_ioc(void); bool cpr_incoming_needed(void *opaque); +int cpr_get_fd_param(const char *name, const char *fdname, int index, + Error **errp); QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp); QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp); diff --git a/include/qemu/accel.h b/include/qemu/accel.h index fbd3d89..9e821d0 100644 --- a/include/qemu/accel.h +++ b/include/qemu/accel.h @@ -37,17 +37,21 @@ typedef struct AccelClass { /*< public >*/ const char *name; - int (*init_machine)(MachineState *ms); + /* Cached by accel_init_ops_interfaces() when created */ + AccelOpsClass *ops; + + int (*init_machine)(AccelState *as, MachineState *ms); bool (*cpu_common_realize)(CPUState *cpu, Error **errp); void (*cpu_common_unrealize)(CPUState *cpu); /* system related hooks */ - void (*setup_post)(MachineState *ms, AccelState *accel); - bool (*has_memory)(MachineState *ms, AddressSpace *as, + void (*setup_post)(AccelState *as); + void (*pre_resume_vm)(AccelState *as, bool step_pending); + bool (*has_memory)(AccelState *accel, AddressSpace *as, hwaddr start_addr, hwaddr size); /* gdbstub related hooks */ - int (*gdbstub_supported_sstep_flags)(void); + int (*gdbstub_supported_sstep_flags)(AccelState *as); bool *allowed; /* @@ -83,6 +87,8 @@ int accel_init_machine(AccelState *accel, MachineState *ms); /* Called just before os_setup_post (ie just before drop OS privs) */ void accel_setup_post(MachineState *ms); +void accel_pre_resume(MachineState *ms, bool step_pending); + /** * accel_cpu_instance_init: * @cpu: The CPU that needs to do accel-specific object initializations. diff --git a/include/system/accel-ops.h b/include/system/accel-ops.h index 4c99d25..bf73835 100644 --- a/include/system/accel-ops.h +++ b/include/system/accel-ops.h @@ -10,6 +10,7 @@ #ifndef ACCEL_OPS_H #define ACCEL_OPS_H +#include "qemu/accel.h" #include "exec/vaddr.h" #include "qom/object.h" @@ -31,7 +32,7 @@ struct AccelOpsClass { /*< public >*/ /* initialization function called when accel is chosen */ - void (*ops_init)(AccelOpsClass *ops); + void (*ops_init)(AccelClass *ac); bool (*cpus_are_resettable)(void); void (*cpu_reset_hold)(CPUState *cpu); @@ -40,12 +41,28 @@ struct AccelOpsClass { void (*kick_vcpu_thread)(CPUState *cpu); bool (*cpu_thread_is_idle)(CPUState *cpu); + /** + * synchronize_post_reset: + * synchronize_post_init: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers to the hardware accelerator + * (QEMU is the reference). + */ void (*synchronize_post_reset)(CPUState *cpu); void (*synchronize_post_init)(CPUState *cpu); + /** + * synchronize_state: + * synchronize_pre_loadvm: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers from the hardware accelerator + * (the hardware accelerator is the reference). + */ void (*synchronize_state)(CPUState *cpu); void (*synchronize_pre_loadvm)(CPUState *cpu); - void (*synchronize_pre_resume)(bool step_pending); + /* handle_interrupt is mandatory. */ void (*handle_interrupt)(CPUState *cpu, int mask); /** @@ -70,4 +87,6 @@ struct AccelOpsClass { void (*remove_all_breakpoints)(CPUState *cpu); }; +void generic_handle_interrupt(CPUState *cpu, int mask); + #endif /* ACCEL_OPS_H */ diff --git a/include/system/cpus.h b/include/system/cpus.h index 3226c76..69be6a7 100644 --- a/include/system/cpus.h +++ b/include/system/cpus.h @@ -7,11 +7,6 @@ void cpus_register_accel(const AccelOpsClass *i); /* return registers ops */ const AccelOpsClass *cpus_get_accel(void); -/* accel/dummy-cpus.c */ - -/* Create a dummy vcpu for AccelOpsClass->create_vcpu_thread */ -void dummy_start_vcpu_thread(CPUState *); - /* interface available for cpus accelerator threads */ /* For temporary buffers for forming a name */ diff --git a/include/system/hvf.h b/include/system/hvf.h index a9a502f..d3dcf08 100644 --- a/include/system/hvf.h +++ b/include/system/hvf.h @@ -14,10 +14,6 @@ #define HVF_H #include "qemu/accel.h" -#include "qemu/queue.h" -#include "exec/vaddr.h" -#include "qom/object.h" -#include "exec/vaddr.h" #ifdef COMPILING_PER_TARGET # ifdef CONFIG_HVF @@ -40,38 +36,4 @@ typedef struct HVFState HVFState; DECLARE_INSTANCE_CHECKER(HVFState, HVF_STATE, TYPE_HVF_ACCEL) -#ifdef COMPILING_PER_TARGET -struct hvf_sw_breakpoint { - vaddr pc; - vaddr saved_insn; - int use_count; - QTAILQ_ENTRY(hvf_sw_breakpoint) entry; -}; - -struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, - vaddr pc); -int hvf_sw_breakpoints_active(CPUState *cpu); - -int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); -int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); -int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); -void hvf_arch_remove_all_hw_breakpoints(void); - -/* - * hvf_update_guest_debug: - * @cs: CPUState for the CPU to update - * - * Update guest to enable or disable debugging. Per-arch specifics will be - * handled by calling down to hvf_arch_update_guest_debug. - */ -int hvf_update_guest_debug(CPUState *cpu); -void hvf_arch_update_guest_debug(CPUState *cpu); - -/* - * Return whether the guest supports debugging. - */ -bool hvf_arch_supports_guest_debug(void); -#endif /* COMPILING_PER_TARGET */ - #endif diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h index d774e58..5150c7d 100644 --- a/include/system/hvf_int.h +++ b/include/system/hvf_int.h @@ -12,6 +12,8 @@ #define HVF_INT_H #include "qemu/queue.h" +#include "exec/vaddr.h" +#include "qom/object.h" #ifdef __aarch64__ #include <Hypervisor/Hypervisor.h> @@ -60,7 +62,6 @@ struct AccelCPUState { bool vtimer_masked; sigset_t unblock_ipi_mask; bool guest_debug_enabled; - bool dirty; }; void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, @@ -77,4 +78,36 @@ int hvf_put_registers(CPUState *); int hvf_get_registers(CPUState *); void hvf_kick_vcpu_thread(CPUState *cpu); +struct hvf_sw_breakpoint { + vaddr pc; + vaddr saved_insn; + int use_count; + QTAILQ_ENTRY(hvf_sw_breakpoint) entry; +}; + +struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, + vaddr pc); +int hvf_sw_breakpoints_active(CPUState *cpu); + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp); +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type); +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type); +void hvf_arch_remove_all_hw_breakpoints(void); + +/* + * hvf_update_guest_debug: + * @cs: CPUState for the CPU to update + * + * Update guest to enable or disable debugging. Per-arch specifics will be + * handled by calling down to hvf_arch_update_guest_debug. + */ +int hvf_update_guest_debug(CPUState *cpu); +void hvf_arch_update_guest_debug(CPUState *cpu); + +/* + * Return whether the guest supports debugging. + */ +bool hvf_arch_supports_guest_debug(void); + #endif diff --git a/include/system/hw_accel.h b/include/system/hw_accel.h index 380e9e6..fa9228d 100644 --- a/include/system/hw_accel.h +++ b/include/system/hw_accel.h @@ -17,9 +17,26 @@ #include "system/whpx.h" #include "system/nvmm.h" +/** + * cpu_synchronize_state: + * cpu_synchronize_pre_loadvm: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers from the hardware accelerator + * (the hardware accelerator is the reference). + */ void cpu_synchronize_state(CPUState *cpu); +void cpu_synchronize_pre_loadvm(CPUState *cpu); + +/** + * cpu_synchronize_post_reset: + * cpu_synchronize_post_init: + * @cpu: The vCPU to synchronize. + * + * Request to synchronize QEMU vCPU registers to the hardware accelerator + * (QEMU is the reference). + */ void cpu_synchronize_post_reset(CPUState *cpu); void cpu_synchronize_post_init(CPUState *cpu); -void cpu_synchronize_pre_loadvm(CPUState *cpu); #endif /* QEMU_HW_ACCEL_H */ diff --git a/include/system/iommufd.h b/include/system/iommufd.h index 283861b..c9c72ff 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -32,6 +32,7 @@ struct IOMMUFDBackend { /*< protected >*/ int fd; /* /dev/iommu file descriptor */ bool owned; /* is the /dev/iommu opened internally */ + Error *cpr_blocker;/* set if be does not support CPR */ uint32_t users; /*< public >*/ @@ -43,6 +44,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be); bool iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, Error **errp); void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, int fd, + unsigned long start, bool readonly); int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, @@ -66,6 +70,9 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, uint32_t *entry_num, void *data, Error **errp); +bool iommufd_change_process_capable(IOMMUFDBackend *be); +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp); + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, HOST_IOMMU_DEVICE_IOMMUFD) diff --git a/include/system/kvm.h b/include/system/kvm.h index 7cc60d2..3c7d314 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -195,6 +195,7 @@ bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_max_nested_state_length(void); int kvm_has_gsi_routing(void); +void kvm_close(void); /** * kvm_arm_supports_user_irq @@ -317,14 +318,6 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test); bool kvm_device_supported(int vmfd, uint64_t type); /** - * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU - * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. - * - * @returns: 0 when success, errno (<0) when failed. - */ -int kvm_create_vcpu(CPUState *cpu); - -/** * kvm_park_vcpu - Park QEMU KVM vCPU context * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. * diff --git a/include/system/nvmm.h b/include/system/nvmm.h index 6971ddb..7390def 100644 --- a/include/system/nvmm.h +++ b/include/system/nvmm.h @@ -13,17 +13,18 @@ #define QEMU_NVMM_H #ifdef COMPILING_PER_TARGET - -#ifdef CONFIG_NVMM - -int nvmm_enabled(void); - -#else /* CONFIG_NVMM */ - -#define nvmm_enabled() (0) - -#endif /* CONFIG_NVMM */ - +# ifdef CONFIG_NVMM +# define CONFIG_NVMM_IS_POSSIBLE +# endif /* !CONFIG_NVMM */ +#else +# define CONFIG_NVMM_IS_POSSIBLE #endif /* COMPILING_PER_TARGET */ +#ifdef CONFIG_NVMM_IS_POSSIBLE +extern bool nvmm_allowed; +#define nvmm_enabled() (nvmm_allowed) +#else /* !CONFIG_NVMM_IS_POSSIBLE */ +#define nvmm_enabled() 0 +#endif /* !CONFIG_NVMM_IS_POSSIBLE */ + #endif /* QEMU_NVMM_H */ diff --git a/include/system/whpx.h b/include/system/whpx.h index 00ff409..00f6a3e 100644 --- a/include/system/whpx.h +++ b/include/system/whpx.h @@ -16,19 +16,20 @@ #define QEMU_WHPX_H #ifdef COMPILING_PER_TARGET +# ifdef CONFIG_WHPX +# define CONFIG_WHPX_IS_POSSIBLE +# endif /* !CONFIG_WHPX */ +#else +# define CONFIG_WHPX_IS_POSSIBLE +#endif /* COMPILING_PER_TARGET */ -#ifdef CONFIG_WHPX - -int whpx_enabled(void); +#ifdef CONFIG_WHPX_IS_POSSIBLE +extern bool whpx_allowed; +#define whpx_enabled() (whpx_allowed) bool whpx_apic_in_platform(void); - -#else /* CONFIG_WHPX */ - -#define whpx_enabled() (0) +#else /* !CONFIG_WHPX_IS_POSSIBLE */ +#define whpx_enabled() 0 #define whpx_apic_in_platform() (0) - -#endif /* CONFIG_WHPX */ - -#endif /* COMPILING_PER_TARGET */ +#endif /* !CONFIG_WHPX_IS_POSSIBLE */ #endif /* QEMU_WHPX_H */ diff --git a/linux-user/main.c b/linux-user/main.c index 5ac5b55..a9142ee 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -820,7 +820,7 @@ int main(int argc, char **argv, char **envp) opt_one_insn_per_tb, &error_abort); object_property_set_int(OBJECT(accel), "tb-size", opt_tb_size, &error_abort); - ac->init_machine(NULL); + ac->init_machine(accel, NULL); } /* diff --git a/migration/cpr.c b/migration/cpr.c index a50a57e..42ad0b0 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -7,25 +7,21 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "hw/vfio/vfio-device.h" #include "migration/cpr.h" #include "migration/misc.h" #include "migration/options.h" #include "migration/qemu-file.h" #include "migration/savevm.h" #include "migration/vmstate.h" +#include "monitor/monitor.h" #include "system/runstate.h" #include "trace.h" /*************************************************************************/ /* cpr state container for all information to be saved. */ -typedef QLIST_HEAD(CprFdList, CprFd) CprFdList; - -typedef struct CprState { - CprFdList fds; -} CprState; - -static CprState cpr_state; +CprState cpr_state; /****************************************************************************/ @@ -126,8 +122,6 @@ int cpr_open_fd(const char *path, int flags, const char *name, int id, } /*************************************************************************/ -#define CPR_STATE "CprState" - static const VMStateDescription vmstate_cpr_state = { .name = CPR_STATE, .version_id = 1, @@ -135,6 +129,10 @@ static const VMStateDescription vmstate_cpr_state = { .fields = (VMStateField[]) { VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_cpr_vfio_devices, + NULL } }; /*************************************************************************/ @@ -264,3 +262,39 @@ bool cpr_incoming_needed(void *opaque) MigMode mode = migrate_mode(); return mode == MIG_MODE_CPR_TRANSFER; } + +/* + * cpr_get_fd_param: find a descriptor and return its value. + * + * @name: CPR name for the descriptor + * @fdname: An integer-valued string, or a name passed to a getfd command + * @index: CPR index of the descriptor + * @errp: returned error message + * + * If CPR is not being performed, then use @fdname to find the fd. + * If CPR is being performed, then ignore @fdname, and look for @name + * and @index in CPR state. + * + * On success returns the fd value, else returns -1. + */ +int cpr_get_fd_param(const char *name, const char *fdname, int index, + Error **errp) +{ + ERRP_GUARD(); + int fd; + + if (cpr_is_incoming()) { + fd = cpr_find_fd(name, index); + if (fd < 0) { + error_setg(errp, "cannot find saved value for fd %s", fdname); + } + } else { + fd = monitor_fd_param(monitor_cur(), fdname, errp); + if (fd >= 0) { + cpr_save_fd(name, index, fd); + } else { + error_prepend(errp, "Could not parse object fd %s:", fdname); + } + } + return fd; +} diff --git a/monitor/hmp-cmds-target.c b/monitor/hmp-cmds-target.c index 8eaf70d..e982061 100644 --- a/monitor/hmp-cmds-target.c +++ b/monitor/hmp-cmds-target.c @@ -102,7 +102,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict) if (all_cpus) { CPU_FOREACH(cs) { monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index); - cpu_dump_state(cs, NULL, CPU_DUMP_FPU); + cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU); } } else { cs = vcpu >= 0 ? qemu_get_cpu(vcpu) : mon_get_cpu(mon); @@ -117,7 +117,7 @@ void hmp_info_registers(Monitor *mon, const QDict *qdict) } monitor_printf(mon, "\nCPU#%d\n", cs->cpu_index); - cpu_dump_state(cs, NULL, CPU_DUMP_FPU); + cpu_dump_state(cs, NULL, CPU_DUMP_FPU | CPU_DUMP_VPU); } } diff --git a/qapi/machine.json b/qapi/machine.json index 0650b8d..f712e7d 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1762,24 +1762,6 @@ 'features': [ 'unstable' ] } ## -# @x-query-opcount: -# -# Query TCG opcode counters -# -# Features: -# -# @unstable: This command is meant for debugging. -# -# Returns: TCG opcode counters -# -# Since: 6.2 -## -{ 'command': 'x-query-opcount', - 'returns': 'HumanReadableText', - 'if': 'CONFIG_TCG', - 'features': [ 'unstable' ] } - -## # @x-query-ramblock: # # Query system ramblock information diff --git a/qapi/migration.json b/qapi/migration.json index 4963f6c..e8a7d3b 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -620,8 +620,10 @@ # # @cpr-transfer: This mode allows the user to transfer a guest to a # new QEMU instance on the same host with minimal guest pause -# time by preserving guest RAM in place. Devices and their pinned -# pages will also be preserved in a future QEMU release. +# time by preserving guest RAM in place. +# +# Devices and their pinned pages are also preserved for VFIO and +# IOMMUFD. (since 10.1) # # The user starts new QEMU on the same host as old QEMU, with # command-line arguments to create the same machine, plus the diff --git a/system/cpus.c b/system/cpus.c index d16b0df..8e6da2e 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -254,7 +254,7 @@ int64_t cpus_get_elapsed_ticks(void) return cpu_get_ticks(); } -static void generic_handle_interrupt(CPUState *cpu, int mask) +void generic_handle_interrupt(CPUState *cpu, int mask) { cpu->interrupt_request |= mask; @@ -265,11 +265,9 @@ static void generic_handle_interrupt(CPUState *cpu, int mask) void cpu_interrupt(CPUState *cpu, int mask) { - if (cpus_accel->handle_interrupt) { - cpus_accel->handle_interrupt(cpu, mask); - } else { - generic_handle_interrupt(cpu, mask); - } + g_assert(bql_locked()); + + cpus_accel->handle_interrupt(cpu, mask); } /* @@ -678,6 +676,8 @@ void cpus_register_accel(const AccelOpsClass *ops) { assert(ops != NULL); assert(ops->create_vcpu_thread != NULL); /* mandatory */ + assert(ops->handle_interrupt); + cpus_accel = ops; } @@ -768,9 +768,7 @@ int vm_prepare_start(bool step_pending) * WHPX accelerator needs to know whether we are going to step * any CPUs, before starting the first one. */ - if (cpus_accel->synchronize_pre_resume) { - cpus_accel->synchronize_pre_resume(step_pending); - } + accel_pre_resume(MACHINE(qdev_get_machine()), step_pending); /* We are sending this now, but the CPUs will be resumed shortly later */ qapi_event_send_resume(); diff --git a/system/memory.c b/system/memory.c index 76b44b8..e8d9b15 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3501,7 +3501,7 @@ static void mtree_print_flatview(gpointer key, gpointer value, if (fvi->ac) { for (i = 0; i < fv_address_spaces->len; ++i) { as = g_array_index(fv_address_spaces, AddressSpace*, i); - if (fvi->ac->has_memory(current_machine, as, + if (fvi->ac->has_memory(current_machine->accelerator, as, int128_get64(range->addr.start), MR_SIZE(range->addr.size) + 1)) { qemu_printf(" %s", fvi->ac->name); diff --git a/system/physmem.c b/system/physmem.c index ff0ca40..130c148 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1593,6 +1593,11 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb) return rb->offset; } +ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb) +{ + return rb->fd_offset; +} + ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) { return rb->used_length; diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 7b6d291..c9cfcdc 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -813,9 +813,9 @@ int hvf_put_registers(CPUState *cpu) static void flush_cpu_state(CPUState *cpu) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } } diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 99e37a3..818b504 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -733,9 +733,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { hvf_put_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (hvf_inject_interrupts(cpu)) { diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index 2057314..17fce1d 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->accel->dirty) { + if (!cs->vcpu_dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 2144307..a5517b0 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -87,6 +87,7 @@ static void nvmm_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = nvmm_start_vcpu_thread; ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index f1c6120..b4a4d50 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -30,7 +30,6 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; - bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -47,7 +46,7 @@ struct qemu_machine { /* -------------------------------------------------------------------------- */ -static bool nvmm_allowed; +bool nvmm_allowed; static struct qemu_machine qemu_mach; static struct nvmm_machine * @@ -508,7 +507,7 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static void @@ -517,7 +516,7 @@ nvmm_mem_callback(struct nvmm_mem *mem) cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->accel->dirty = false; + current_cpu->vcpu_dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -727,9 +726,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (qcpu->stop) { @@ -827,32 +826,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -982,7 +981,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - qcpu->dirty = true; + qcpu->vcpu_dirty = true; cpu->accel = qcpu; return 0; @@ -1153,7 +1152,7 @@ static struct RAMBlockNotifier nvmm_ram_notifier = { /* -------------------------------------------------------------------------- */ static int -nvmm_accel_init(MachineState *ms) +nvmm_accel_init(AccelState *as, MachineState *ms) { int ret, err; @@ -1193,12 +1192,6 @@ nvmm_accel_init(MachineState *ms) return 0; } -int -nvmm_enabled(void) -{ - return nvmm_allowed; -} - static void nvmm_accel_class_init(ObjectClass *oc, const void *data) { diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index b8bebe4..5f4841c 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -90,12 +90,12 @@ static void whpx_accel_ops_class_init(ObjectClass *oc, const void *data) ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + ops->handle_interrupt = generic_handle_interrupt; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; ops->synchronize_state = whpx_cpu_synchronize_state; ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm; - ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume; } static const TypeInfo whpx_accel_ops_type = { diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h index e6cf155..54cfc25 100644 --- a/target/i386/whpx/whpx-accel-ops.h +++ b/target/i386/whpx/whpx-accel-ops.h @@ -21,7 +21,6 @@ void whpx_cpu_synchronize_state(CPUState *cpu); void whpx_cpu_synchronize_post_reset(CPUState *cpu); void whpx_cpu_synchronize_post_init(CPUState *cpu); void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); -void whpx_cpu_synchronize_pre_resume(bool step_pending); /* state subset only touched by the VCPU itself during runtime */ #define WHPX_SET_RUNTIME_STATE 1 diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index cf6d3e4..faf56e1 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -237,13 +237,12 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; - bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; }; -static bool whpx_allowed; +bool whpx_allowed; static bool whp_dispatch_initialized; static HMODULE hWinHvPlatform, hWinHvEmulation; static uint32_t max_vcpu_index; @@ -836,7 +835,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; return hr; } @@ -1391,7 +1390,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1704,9 +1703,9 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->accel->dirty) { + if (cpu->vcpu_dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { @@ -2054,9 +2053,9 @@ static int whpx_vcpu_run(CPUState *cpu) static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { whpx_get_registers(cpu); - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } } @@ -2064,20 +2063,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->accel->dirty = false; + cpu->vcpu_dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->accel->dirty = true; + cpu->vcpu_dirty = true; } /* @@ -2086,7 +2085,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->accel->dirty) { + if (!cpu->vcpu_dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2106,7 +2105,7 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); } -void whpx_cpu_synchronize_pre_resume(bool step_pending) +static void whpx_pre_resume_vm(AccelState *as, bool step_pending) { whpx_global.step_pending = step_pending; } @@ -2226,7 +2225,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - vcpu->dirty = true; + cpu->vcpu_dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); @@ -2505,7 +2504,7 @@ static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, * Partition support */ -static int whpx_accel_init(MachineState *ms) +static int whpx_accel_init(AccelState *as, MachineState *ms) { struct whpx_state *whpx; int ret; @@ -2689,11 +2688,6 @@ error: return ret; } -int whpx_enabled(void) -{ - return whpx_allowed; -} - bool whpx_apic_in_platform(void) { return whpx_global.apic_in_platform; } @@ -2703,6 +2697,7 @@ static void whpx_accel_class_init(ObjectClass *oc, const void *data) AccelClass *ac = ACCEL_CLASS(oc); ac->name = "WHPX"; ac->init_machine = whpx_accel_init; + ac->pre_resume_vm = whpx_pre_resume_vm; ac->allowed = &whpx_allowed; object_class_property_add(oc, "kernel-irqchip", "on|off|split", diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 1ee05eb..75f4e43 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -55,6 +55,7 @@ #define TYPE_RISCV_CPU_VEYRON_V1 RISCV_CPU_TYPE_NAME("veyron-v1") #define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_XIANGSHAN_NANHU RISCV_CPU_TYPE_NAME("xiangshan-nanhu") +#define TYPE_RISCV_CPU_XIANGSHAN_KMH RISCV_CPU_TYPE_NAME("xiangshan-kunminghu") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 629ac37..d055ddf 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -127,8 +127,8 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo), ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_13_0, ext_zabha), ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), - ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc), + ISA_EXT_DATA_ENTRY(zama16b, PRIV_VERSION_1_13_0, ext_zama16b), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), @@ -189,6 +189,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkt, PRIV_VERSION_1_12_0, ext_zvkt), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), + ISA_EXT_DATA_ENTRY(sdtrig, PRIV_VERSION_1_12_0, debug), ISA_EXT_DATA_ENTRY(shcounterenw, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sha, PRIV_VERSION_1_12_0, ext_sha), ISA_EXT_DATA_ENTRY(shgatpa, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -216,6 +217,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ssnpm, PRIV_VERSION_1_13_0, ext_ssnpm), ISA_EXT_DATA_ENTRY(sspm, PRIV_VERSION_1_13_0, ext_sspm), ISA_EXT_DATA_ENTRY(ssstateen, PRIV_VERSION_1_12_0, ext_ssstateen), + ISA_EXT_DATA_ENTRY(ssstrict, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstc, PRIV_VERSION_1_12_0, ext_sstc), ISA_EXT_DATA_ENTRY(sstvala, PRIV_VERSION_1_12_0, has_priv_1_12), ISA_EXT_DATA_ENTRY(sstvecd, PRIV_VERSION_1_12_0, has_priv_1_12), @@ -228,6 +230,7 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(svinval, PRIV_VERSION_1_12_0, ext_svinval), ISA_EXT_DATA_ENTRY(svnapot, PRIV_VERSION_1_12_0, ext_svnapot), ISA_EXT_DATA_ENTRY(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt), + ISA_EXT_DATA_ENTRY(svrsw60t59b, PRIV_VERSION_1_13_0, ext_svrsw60t59b), ISA_EXT_DATA_ENTRY(svukte, PRIV_VERSION_1_13_0, ext_svukte), ISA_EXT_DATA_ENTRY(svvptc, PRIV_VERSION_1_13_0, ext_svvptc), ISA_EXT_DATA_ENTRY(xtheadba, PRIV_VERSION_1_11_0, ext_xtheadba), @@ -1117,6 +1120,7 @@ static void riscv_cpu_init(Object *obj) cpu->cfg.cbom_blocksize = 64; cpu->cfg.cbop_blocksize = 64; cpu->cfg.cboz_blocksize = 64; + cpu->cfg.pmp_regions = 16; cpu->env.vext_ver = VEXT_VERSION_1_00_0; cpu->cfg.max_satp_mode = -1; @@ -1282,6 +1286,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + MULTI_EXT_CFG_BOOL("svrsw60t59b", ext_svrsw60t59b, false), MULTI_EXT_CFG_BOOL("svvptc", ext_svvptc, true), MULTI_EXT_CFG_BOOL("zicntr", ext_zicntr, true), @@ -1375,31 +1380,24 @@ const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { * 'Named features' is the name we give to extensions that we * don't want to expose to users. They are either immutable * (always enabled/disable) or they'll vary depending on - * the resulting CPU state. They have riscv,isa strings - * and priv_ver like regular extensions. + * the resulting CPU state. + * + * Some of them are always enabled depending on priv version + * of the CPU and are declared directly in isa_edata_arr[]. + * The ones listed here have special checks during finalize() + * time and require their own flags like regular extensions. + * See riscv_cpu_update_named_features() for more info. */ const RISCVCPUMultiExtConfig riscv_cpu_named_features[] = { MULTI_EXT_CFG_BOOL("zic64b", ext_zic64b, true), MULTI_EXT_CFG_BOOL("ssstateen", ext_ssstateen, true), MULTI_EXT_CFG_BOOL("sha", ext_sha, true), - MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), - { }, -}; - -/* Deprecated entries marked for future removal */ -const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { - MULTI_EXT_CFG_BOOL("Zifencei", ext_zifencei, true), - MULTI_EXT_CFG_BOOL("Zicsr", ext_zicsr, true), - MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), - MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), - MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), - MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), - MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), - MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), - MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), - MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), - MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), + /* + * 'ziccrse' has its own flag because the KVM driver + * wants to enable/disable it on its own accord. + */ + MULTI_EXT_CFG_BOOL("ziccrse", ext_ziccrse, true), { }, }; @@ -1568,6 +1566,46 @@ static const PropertyInfo prop_pmp = { .set = prop_pmp_set, }; +static void prop_num_pmp_regions_set(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + uint8_t value; + + visit_type_uint8(v, name, &value, errp); + + if (cpu->cfg.pmp_regions != value && riscv_cpu_is_vendor(obj)) { + cpu_set_prop_err(cpu, name, errp); + return; + } + + if (cpu->env.priv_ver < PRIV_VERSION_1_12_0 && value > OLD_MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } else if (value > MAX_RISCV_PMPS) { + error_setg(errp, "Number of PMP regions exceeds maximum available"); + return; + } + + cpu_option_add_user_setting(name, value); + cpu->cfg.pmp_regions = value; +} + +static void prop_num_pmp_regions_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t value = RISCV_CPU(obj)->cfg.pmp_regions; + + visit_type_uint8(v, name, &value, errp); +} + +static const PropertyInfo prop_num_pmp_regions = { + .type = "uint8", + .description = "num-pmp-regions", + .get = prop_num_pmp_regions_get, + .set = prop_num_pmp_regions_set, +}; + static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; @@ -2567,6 +2605,7 @@ static const Property riscv_cpu_properties[] = { {.name = "mmu", .info = &prop_mmu}, {.name = "pmp", .info = &prop_pmp}, + {.name = "num-pmp-regions", .info = &prop_num_pmp_regions}, {.name = "priv_spec", .info = &prop_priv_spec}, {.name = "vext_spec", .info = &prop_vext_spec}, @@ -2595,6 +2634,7 @@ static const Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false), DEFINE_PROP_BOOL("rvv_ma_all_1s", RISCVCPU, cfg.rvv_ma_all_1s, false), DEFINE_PROP_BOOL("rvv_vl_half_avl", RISCVCPU, cfg.rvv_vl_half_avl, false), + DEFINE_PROP_BOOL("rvv_vsetvl_x0_vill", RISCVCPU, cfg.rvv_vsetvl_x0_vill, false), /* * write_misa() is marked as experimental for now so mark @@ -2937,7 +2977,8 @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_MBARE, .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), DEFINE_ABSTRACT_RISCV_CPU(TYPE_RISCV_CPU_SIFIVE_U, TYPE_RISCV_VENDOR_CPU, @@ -2948,7 +2989,8 @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.ext_zifencei = true, .cfg.ext_zicsr = true, .cfg.mmu = true, - .cfg.pmp = true + .cfg.pmp = true, + .cfg.pmp_regions = 8 ), #if defined(TARGET_RISCV32) || \ @@ -3167,6 +3209,64 @@ static const TypeInfo riscv_cpu_type_infos[] = { .cfg.max_satp_mode = VM_1_10_SV39, ), + DEFINE_RISCV_CPU(TYPE_RISCV_CPU_XIANGSHAN_KMH, TYPE_RISCV_VENDOR_CPU, + .misa_mxl_max = MXL_RV64, + .misa_ext = RVG | RVC | RVB | RVS | RVU | RVH | RVV, + .priv_spec = PRIV_VERSION_1_13_0, + /* + * The RISC-V Instruction Set Manual: Volume I + * Unprivileged Architecture + */ + .cfg.ext_zicntr = true, + .cfg.ext_zihpm = true, + .cfg.ext_zihintntl = true, + .cfg.ext_zihintpause = true, + .cfg.ext_zimop = true, + .cfg.ext_zcmop = true, + .cfg.ext_zicond = true, + .cfg.ext_zawrs = true, + .cfg.ext_zacas = true, + .cfg.ext_zfh = true, + .cfg.ext_zfa = true, + .cfg.ext_zcb = true, + .cfg.ext_zbc = true, + .cfg.ext_zvfh = true, + .cfg.ext_zkn = true, + .cfg.ext_zks = true, + .cfg.ext_zkt = true, + .cfg.ext_zvbb = true, + .cfg.ext_zvkt = true, + /* + * The RISC-V Instruction Set Manual: Volume II + * Privileged Architecture + */ + .cfg.ext_smstateen = true, + .cfg.ext_smcsrind = true, + .cfg.ext_sscsrind = true, + .cfg.ext_svnapot = true, + .cfg.ext_svpbmt = true, + .cfg.ext_svinval = true, + .cfg.ext_sstc = true, + .cfg.ext_sscofpmf = true, + .cfg.ext_ssdbltrp = true, + .cfg.ext_ssnpm = true, + .cfg.ext_smnpm = true, + .cfg.ext_smmpm = true, + .cfg.ext_sspm = true, + .cfg.ext_supm = true, + /* The RISC-V Advanced Interrupt Architecture */ + .cfg.ext_smaia = true, + .cfg.ext_ssaia = true, + /* RVA23 Profiles */ + .cfg.ext_zicbom = true, + .cfg.ext_zicbop = true, + .cfg.ext_zicboz = true, + .cfg.ext_svade = true, + .cfg.mmu = true, + .cfg.pmp = true, + .cfg.max_satp_mode = VM_1_10_SV48, + ), + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) DEFINE_RISCV_CPU(TYPE_RISCV_CPU_BASE128, TYPE_RISCV_DYNAMIC_CPU, .cfg.max_satp_mode = VM_1_10_SV57, diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 229ade9..4a862da 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -82,7 +82,22 @@ typedef struct riscv_cpu_profile { struct riscv_cpu_profile *s_parent; const char *name; uint32_t misa_ext; + /* + * The profile is enabled/disabled via command line or + * via cpu_init(). Enabling a profile will add all its + * mandatory extensions in the CPU during init(). + */ bool enabled; + /* + * The profile is present in the CPU, i.e. the current set of + * CPU extensions complies with it. A profile can be enabled + * and not present (e.g. the user disabled a mandatory extension) + * and the other way around (e.g. all mandatory extensions are + * present in a non-profile CPU). + * + * QMP uses this flag. + */ + bool present; bool user_set; int priv_spec; int satp_mode; @@ -159,7 +174,8 @@ extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; #define MMU_USER_IDX 3 -#define MAX_RISCV_PMPS (16) +#define MAX_RISCV_PMPS (64) +#define OLD_MAX_RISCV_PMPS (16) #if !defined(CONFIG_USER_ONLY) #include "pmp.h" @@ -936,7 +952,6 @@ extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; extern const RISCVCPUMultiExtConfig riscv_cpu_named_features[]; -extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; typedef struct isa_ext_data { const char *name; diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index a30317c..b62dd82 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -372,6 +372,18 @@ #define CSR_PMPCFG1 0x3a1 #define CSR_PMPCFG2 0x3a2 #define CSR_PMPCFG3 0x3a3 +#define CSR_PMPCFG4 0x3a4 +#define CSR_PMPCFG5 0x3a5 +#define CSR_PMPCFG6 0x3a6 +#define CSR_PMPCFG7 0x3a7 +#define CSR_PMPCFG8 0x3a8 +#define CSR_PMPCFG9 0x3a9 +#define CSR_PMPCFG10 0x3aa +#define CSR_PMPCFG11 0x3ab +#define CSR_PMPCFG12 0x3ac +#define CSR_PMPCFG13 0x3ad +#define CSR_PMPCFG14 0x3ae +#define CSR_PMPCFG15 0x3af #define CSR_PMPADDR0 0x3b0 #define CSR_PMPADDR1 0x3b1 #define CSR_PMPADDR2 0x3b2 @@ -388,6 +400,54 @@ #define CSR_PMPADDR13 0x3bd #define CSR_PMPADDR14 0x3be #define CSR_PMPADDR15 0x3bf +#define CSR_PMPADDR16 0x3c0 +#define CSR_PMPADDR17 0x3c1 +#define CSR_PMPADDR18 0x3c2 +#define CSR_PMPADDR19 0x3c3 +#define CSR_PMPADDR20 0x3c4 +#define CSR_PMPADDR21 0x3c5 +#define CSR_PMPADDR22 0x3c6 +#define CSR_PMPADDR23 0x3c7 +#define CSR_PMPADDR24 0x3c8 +#define CSR_PMPADDR25 0x3c9 +#define CSR_PMPADDR26 0x3ca +#define CSR_PMPADDR27 0x3cb +#define CSR_PMPADDR28 0x3cc +#define CSR_PMPADDR29 0x3cd +#define CSR_PMPADDR30 0x3ce +#define CSR_PMPADDR31 0x3cf +#define CSR_PMPADDR32 0x3d0 +#define CSR_PMPADDR33 0x3d1 +#define CSR_PMPADDR34 0x3d2 +#define CSR_PMPADDR35 0x3d3 +#define CSR_PMPADDR36 0x3d4 +#define CSR_PMPADDR37 0x3d5 +#define CSR_PMPADDR38 0x3d6 +#define CSR_PMPADDR39 0x3d7 +#define CSR_PMPADDR40 0x3d8 +#define CSR_PMPADDR41 0x3d9 +#define CSR_PMPADDR42 0x3da +#define CSR_PMPADDR43 0x3db +#define CSR_PMPADDR44 0x3dc +#define CSR_PMPADDR45 0x3dd +#define CSR_PMPADDR46 0x3de +#define CSR_PMPADDR47 0x3df +#define CSR_PMPADDR48 0x3e0 +#define CSR_PMPADDR49 0x3e1 +#define CSR_PMPADDR50 0x3e2 +#define CSR_PMPADDR51 0x3e3 +#define CSR_PMPADDR52 0x3e4 +#define CSR_PMPADDR53 0x3e5 +#define CSR_PMPADDR54 0x3e6 +#define CSR_PMPADDR55 0x3e7 +#define CSR_PMPADDR56 0x3e8 +#define CSR_PMPADDR57 0x3e9 +#define CSR_PMPADDR58 0x3ea +#define CSR_PMPADDR59 0x3eb +#define CSR_PMPADDR60 0x3ec +#define CSR_PMPADDR61 0x3ed +#define CSR_PMPADDR62 0x3ee +#define CSR_PMPADDR63 0x3ef /* RNMI */ #define CSR_MNSCRATCH 0x740 @@ -675,7 +735,8 @@ typedef enum { #define PTE_SOFT 0x300 /* Reserved for Software */ #define PTE_PBMT 0x6000000000000000ULL /* Page-based memory types */ #define PTE_N 0x8000000000000000ULL /* NAPOT translation */ -#define PTE_RESERVED 0x1FC0000000000000ULL /* Reserved bits */ +#define PTE_RESERVED(svrsw60t59b) \ + (svrsw60t59b ? 0x07C0000000000000ULL : 0x1FC0000000000000ULL) /* Reserved bits */ #define PTE_ATTR (PTE_N | PTE_PBMT) /* All attributes bits */ /* Page table PPN shift amount */ diff --git a/target/riscv/cpu_cfg_fields.h.inc b/target/riscv/cpu_cfg_fields.h.inc index 59f134a..e2d116f 100644 --- a/target/riscv/cpu_cfg_fields.h.inc +++ b/target/riscv/cpu_cfg_fields.h.inc @@ -57,6 +57,7 @@ BOOL_FIELD(ext_svadu) BOOL_FIELD(ext_svinval) BOOL_FIELD(ext_svnapot) BOOL_FIELD(ext_svpbmt) +BOOL_FIELD(ext_svrsw60t59b) BOOL_FIELD(ext_svvptc) BOOL_FIELD(ext_svukte) BOOL_FIELD(ext_zdinx) @@ -114,6 +115,7 @@ BOOL_FIELD(ext_supm) BOOL_FIELD(rvv_ta_all_1s) BOOL_FIELD(rvv_ma_all_1s) BOOL_FIELD(rvv_vl_half_avl) +BOOL_FIELD(rvv_vsetvl_x0_vill) /* Named features */ BOOL_FIELD(ext_svade) BOOL_FIELD(ext_zic64b) @@ -163,6 +165,7 @@ TYPED_FIELD(uint16_t, elen, 0) TYPED_FIELD(uint16_t, cbom_blocksize, 0) TYPED_FIELD(uint16_t, cbop_blocksize, 0) TYPED_FIELD(uint16_t, cboz_blocksize, 0) +TYPED_FIELD(uint8_t, pmp_regions, 0) TYPED_FIELD(int8_t, max_satp_mode, -1) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 2ed69d7..3479a62 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -1309,6 +1309,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, bool svade = riscv_cpu_cfg(env)->ext_svade; bool svadu = riscv_cpu_cfg(env)->ext_svadu; bool adue = svadu ? env->menvcfg & MENVCFG_ADUE : !svade; + bool svrsw60t59b = riscv_cpu_cfg(env)->ext_svrsw60t59b; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); @@ -1376,7 +1377,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, if (riscv_cpu_sxl(env) == MXL_RV32) { ppn = pte >> PTE_PPN_SHIFT; } else { - if (pte & PTE_RESERVED) { + if (pte & PTE_RESERVED(svrsw60t59b)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: reserved bits set in PTE: " "addr: 0x%" HWADDR_PRIx " pte: 0x" TARGET_FMT_lx "\n", __func__, pte_addr, pte); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index fb14972..8631be9 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -738,7 +738,10 @@ static RISCVException dbltrp_hmode(CPURISCVState *env, int csrno) static RISCVException pmp(CPURISCVState *env, int csrno) { if (riscv_cpu_cfg(env)->pmp) { - if (csrno <= CSR_PMPCFG3) { + int max_pmpcfg = (env->priv_ver >= PRIV_VERSION_1_12_0) ? ++ CSR_PMPCFG15 : CSR_PMPCFG3; + + if (csrno <= max_pmpcfg) { uint32_t reg_index = csrno - CSR_PMPCFG0; /* TODO: RV128 restriction check */ @@ -3126,14 +3129,14 @@ static RISCVException write_mscratch(CPURISCVState *env, int csrno, static RISCVException read_mepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->mepc; + *val = env->mepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_mepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->mepc = val; + env->mepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -3181,6 +3184,7 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = MENVCFG_FIOM | MENVCFG_CBIE | MENVCFG_CBCFE | MENVCFG_CBZE | MENVCFG_CDE; + bool stce_changed = false; if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | @@ -3206,8 +3210,18 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if ((val & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (val & MENVCFG_STCE))) { + stce_changed = true; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(val & MENVCFG_STCE)); + } + return write_henvcfg(env, CSR_HENVCFG, env->henvcfg, ra); } @@ -3230,12 +3244,23 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, (cfg->ext_smcdeleg ? MENVCFG_CDE : 0) | (cfg->ext_ssdbltrp ? MENVCFG_DTE : 0); uint64_t valh = (uint64_t)val << 32; + bool stce_changed = false; + + if (cfg->ext_sstc && + ((env->menvcfg & MENVCFG_STCE) != (valh & MENVCFG_STCE))) { + stce_changed = true; + } if ((valh & MENVCFG_DTE) == 0) { env->mstatus &= ~MSTATUS_SDT; } env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); + + if (stce_changed) { + riscv_timer_stce_changed(env, true, !!(valh & MENVCFG_STCE)); + } + return write_henvcfgh(env, CSR_HENVCFGH, env->henvcfg >> 32, ra); } @@ -3313,8 +3338,10 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, static RISCVException write_henvcfg(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = HENVCFG_FIOM | HENVCFG_CBIE | HENVCFG_CBCFE | HENVCFG_CBZE; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { @@ -3340,6 +3367,11 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, get_field(val, HENVCFG_PMM) != PMM_FIELD_RESERVED) { mask |= HENVCFG_PMM; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (val & HENVCFG_STCE))) { + stce_changed = true; + } } env->henvcfg = val & mask; @@ -3347,6 +3379,10 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, env->vsstatus &= ~MSTATUS_SDT; } + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3368,19 +3404,32 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { + const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE | HENVCFG_DTE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; + bool stce_changed = false; ret = smstateen_acc_ok(env, 0, SMSTATEEN0_HSENVCFG); if (ret != RISCV_EXCP_NONE) { return ret; } + + if (cfg->ext_sstc && + ((env->henvcfg & HENVCFG_STCE) != (valh & HENVCFG_STCE))) { + stce_changed = true; + } + env->henvcfg = (env->henvcfg & 0xFFFFFFFF) | (valh & mask); if ((env->henvcfg & HENVCFG_DTE) == 0) { env->vsstatus &= ~MSTATUS_SDT; } + + if (stce_changed) { + riscv_timer_stce_changed(env, false, !!(val & HENVCFG_STCE)); + } + return RISCV_EXCP_NONE; } @@ -3651,7 +3700,14 @@ static RISCVException rmw_mip64(CPURISCVState *env, int csrno, if (riscv_cpu_cfg(env)->ext_sstc && (env->priv == PRV_M) && get_field(env->menvcfg, MENVCFG_STCE)) { /* sstc extension forbids STIP & VSTIP to be writeable in mip */ - mask = mask & ~(MIP_STIP | MIP_VSTIP); + + /* STIP is not writable when menvcfg.STCE is enabled. */ + mask = mask & ~MIP_STIP; + + /* VSTIP is not writable when both [mh]envcfg.STCE are enabled. */ + if (get_field(env->henvcfg, HENVCFG_STCE)) { + mask = mask & ~MIP_VSTIP; + } } if (mask) { @@ -4113,14 +4169,14 @@ static RISCVException write_sscratch(CPURISCVState *env, int csrno, static RISCVException read_sepc(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->sepc; + *val = env->sepc & get_xepc_mask(env); return RISCV_EXCP_NONE; } static RISCVException write_sepc(CPURISCVState *env, int csrno, target_ulong val, uintptr_t ra) { - env->sepc = val; + env->sepc = val & get_xepc_mask(env); return RISCV_EXCP_NONE; } @@ -6111,6 +6167,30 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPCFG1] = { "pmpcfg1", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG2] = { "pmpcfg2", pmp, read_pmpcfg, write_pmpcfg }, [CSR_PMPCFG3] = { "pmpcfg3", pmp, read_pmpcfg, write_pmpcfg }, + [CSR_PMPCFG4] = { "pmpcfg4", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG5] = { "pmpcfg5", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG6] = { "pmpcfg6", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG7] = { "pmpcfg7", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG8] = { "pmpcfg8", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG9] = { "pmpcfg9", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG10] = { "pmpcfg10", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG11] = { "pmpcfg11", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG12] = { "pmpcfg12", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG13] = { "pmpcfg13", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG14] = { "pmpcfg14", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPCFG15] = { "pmpcfg15", pmp, read_pmpcfg, write_pmpcfg, + .min_priv_ver = PRIV_VERSION_1_12_0 }, [CSR_PMPADDR0] = { "pmpaddr0", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR1] = { "pmpaddr1", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR2] = { "pmpaddr2", pmp, read_pmpaddr, write_pmpaddr }, @@ -6125,8 +6205,104 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_PMPADDR11] = { "pmpaddr11", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR12] = { "pmpaddr12", pmp, read_pmpaddr, write_pmpaddr }, [CSR_PMPADDR13] = { "pmpaddr13", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, - [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR14] = { "pmpaddr14", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR15] = { "pmpaddr15", pmp, read_pmpaddr, write_pmpaddr }, + [CSR_PMPADDR16] = { "pmpaddr16", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR17] = { "pmpaddr17", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR18] = { "pmpaddr18", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR19] = { "pmpaddr19", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR20] = { "pmpaddr20", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR21] = { "pmpaddr21", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR22] = { "pmpaddr22", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR23] = { "pmpaddr23", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR24] = { "pmpaddr24", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR25] = { "pmpaddr25", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR26] = { "pmpaddr26", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR27] = { "pmpaddr27", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR28] = { "pmpaddr28", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR29] = { "pmpaddr29", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR30] = { "pmpaddr30", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR31] = { "pmpaddr31", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR32] = { "pmpaddr32", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR33] = { "pmpaddr33", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR34] = { "pmpaddr34", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR35] = { "pmpaddr35", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR36] = { "pmpaddr36", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR37] = { "pmpaddr37", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR38] = { "pmpaddr38", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR39] = { "pmpaddr39", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR40] = { "pmpaddr40", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR41] = { "pmpaddr41", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR42] = { "pmpaddr42", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR43] = { "pmpaddr43", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR44] = { "pmpaddr44", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR45] = { "pmpaddr45", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR46] = { "pmpaddr46", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR47] = { "pmpaddr47", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR48] = { "pmpaddr48", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR49] = { "pmpaddr49", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR50] = { "pmpaddr50", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR51] = { "pmpaddr51", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR52] = { "pmpaddr52", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR53] = { "pmpaddr53", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR54] = { "pmpaddr54", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR55] = { "pmpaddr55", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR56] = { "pmpaddr56", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR57] = { "pmpaddr57", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR58] = { "pmpaddr58", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR59] = { "pmpaddr59", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR60] = { "pmpaddr60", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR61] = { "pmpaddr61", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR62] = { "pmpaddr62", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, + [CSR_PMPADDR63] = { "pmpaddr63", pmp, read_pmpaddr, write_pmpaddr, + .min_priv_ver = PRIV_VERSION_1_12_0 }, /* Debug CSRs */ [CSR_TSELECT] = { "tselect", debug, read_tselect, write_tselect }, diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 706bdfa..af40561 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -755,6 +755,6 @@ uint64_t helper_fcvt_bf16_s(CPURISCVState *env, uint64_t rs1) uint64_t helper_fcvt_s_bf16(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(env, rs1); + float16 frs1 = check_nanbox_bf16(env, rs1); return nanbox_s(env, bfloat16_to_float32(frs1, &env->fp_status)); } diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 85d73e4..f712b1c 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -159,7 +159,7 @@ DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl) #endif /* Vector functions */ -DEF_HELPER_3(vsetvl, tl, env, tl, tl) +DEF_HELPER_4(vsetvl, tl, env, tl, tl, tl) DEF_HELPER_5(vle8_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle16_v, void, ptr, ptr, tl, env, i32) DEF_HELPER_5(vle32_v, void, ptr, ptr, tl, env, i32) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2b6077a..610bf9f 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -202,7 +202,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -222,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); @@ -1361,6 +1361,12 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 4570bd5..172296f 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -142,6 +142,33 @@ static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) } } +static inline float16 check_nanbox_bf16(CPURISCVState *env, uint64_t f) +{ + /* Disable nanbox check when enable zfinx */ + if (env_archcpu(env)->cfg.ext_zfinx) { + return (uint16_t)f; + } + + uint64_t mask = MAKE_64BIT_MASK(16, 48); + + if (likely((f & mask) == mask)) { + return (uint16_t)f; + } else { + return 0x7FC0u; /* default qnan */ + } +} + +static inline target_ulong get_xepc_mask(CPURISCVState *env) +{ + /* When IALIGN=32, both low bits must be zero. + * When IALIGN=16 (has C extension), only bit 0 must be zero. */ + if (riscv_has_ext(env, RVC)) { + return ~(target_ulong)1; + } else { + return ~(target_ulong)3; + } +} + #ifndef CONFIG_USER_ONLY /* Our implementation of SysemuCPUOps::has_work */ bool riscv_cpu_has_work(CPUState *cs); diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index e1a04be..5c19062 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -999,6 +999,19 @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) close(scratch->kvmfd); } +static void kvm_riscv_init_max_satp_mode(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + struct kvm_one_reg reg; + int ret; + + reg.id = RISCV_CONFIG_REG(satp_mode); + reg.addr = (uint64_t)&cpu->cfg.max_satp_mode; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to retrieve satp mode from host, error %d", ret); + } +} + static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { struct kvm_one_reg reg; @@ -1302,6 +1315,7 @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_init_cfg(cpu, &kvmcpu); + kvm_riscv_init_max_satp_mode(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } @@ -1605,7 +1619,7 @@ static void kvm_riscv_handle_sbi_dbcn(CPUState *cs, struct kvm_run *run) break; case SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: ch = run->riscv_sbi.args[0]; - ret = qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch)); + ret = qemu_chr_fe_write_all(serial_hd(0)->be, &ch, sizeof(ch)); if (ret < 0) { error_report("SBI_EXT_DBCN_CONSOLE_WRITE_BYTE: error when " @@ -1985,7 +1999,7 @@ static bool kvm_cpu_realize(CPUState *cs, Error **errp) } } - return true; + return true; } void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) diff --git a/target/riscv/machine.c b/target/riscv/machine.c index c97e9ce..1600ec4 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -36,8 +36,9 @@ static int pmp_post_load(void *opaque, int version_id) RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { pmp_update_rule_addr(env, i); } pmp_update_rule_nums(env); diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 557807b..15460bf 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -280,7 +280,7 @@ target_ulong helper_sret(CPURISCVState *env) riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); } - target_ulong retpc = env->sepc; + target_ulong retpc = env->sepc & get_xepc_mask(env); if (!riscv_cpu_allow_16bit_insn(&env_archcpu(env)->cfg, env->priv_ver, env->misa_ext) && (retpc & 0x3)) { @@ -391,7 +391,7 @@ static target_ulong ssdbltrp_mxret(CPURISCVState *env, target_ulong mstatus, target_ulong helper_mret(CPURISCVState *env) { - target_ulong retpc = env->mepc; + target_ulong retpc = env->mepc & get_xepc_mask(env); uint64_t mstatus = env->mstatus; target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP); diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 5af295e..3540327 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -122,7 +122,9 @@ uint32_t pmp_get_num_rules(CPURISCVState *env) */ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { return env->pmp_state.pmp[pmp_index].cfg_reg; } @@ -136,7 +138,9 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) */ static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { - if (pmp_index < MAX_RISCV_PMPS) { + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; + + if (pmp_index < pmp_regions) { if (env->pmp_state.pmp[pmp_index].cfg_reg == val) { /* no change */ return false; @@ -236,9 +240,10 @@ void pmp_update_rule_addr(CPURISCVState *env, uint32_t pmp_index) void pmp_update_rule_nums(CPURISCVState *env) { int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; env->pmp_state.num_rules = 0; - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); if (PMP_AMATCH_OFF != a_field) { @@ -332,6 +337,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, int pmp_size = 0; hwaddr s = 0; hwaddr e = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Short cut if no rules */ if (0 == pmp_get_num_rules(env)) { @@ -356,7 +362,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, * 1.10 draft priv spec states there is an implicit order * from low to high */ - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { s = pmp_is_in_range(env, i, addr); e = pmp_is_in_range(env, i, addr + pmp_size - 1); @@ -527,8 +533,9 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, { trace_pmpaddr_csr_write(env->mhartid, addr_index, val); bool is_next_cfg_tor = false; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { if (env->pmp_state.pmp[addr_index].addr_reg == val) { /* no change */ return; @@ -538,7 +545,7 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, * In TOR mode, need to check the lock bit of the next pmp * (if there is a next). */ - if (addr_index + 1 < MAX_RISCV_PMPS) { + if (addr_index + 1 < pmp_regions) { uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); @@ -573,8 +580,9 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index) { target_ulong val = 0; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; - if (addr_index < MAX_RISCV_PMPS) { + if (addr_index < pmp_regions) { val = env->pmp_state.pmp[addr_index].addr_reg; trace_pmpaddr_csr_read(env->mhartid, addr_index, val); } else { @@ -592,6 +600,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) { int i; uint64_t mask = MSECCFG_MMWP | MSECCFG_MML; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* Update PMM field only if the value is valid according to Zjpm v1.0 */ if (riscv_cpu_cfg(env)->ext_smmpm && riscv_cpu_mxl(env) == MXL_RV64 && @@ -603,7 +612,7 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) /* RLB cannot be enabled if it's already 0 and if any regions are locked */ if (!MSECCFG_RLB_ISSET(env)) { - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_is_locked(env, i)) { val &= ~MSECCFG_RLB; break; @@ -659,6 +668,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) hwaddr tlb_sa = addr & ~(TARGET_PAGE_SIZE - 1); hwaddr tlb_ea = tlb_sa + TARGET_PAGE_SIZE - 1; int i; + uint8_t pmp_regions = riscv_cpu_cfg(env)->pmp_regions; /* * If PMP is not supported or there are no PMP rules, the TLB page will not @@ -669,7 +679,7 @@ target_ulong pmp_get_tlb_size(CPURISCVState *env, hwaddr addr) return TARGET_PAGE_SIZE; } - for (i = 0; i < MAX_RISCV_PMPS; i++) { + for (i = 0; i < pmp_regions; i++) { if (pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg) == PMP_AMATCH_OFF) { continue; } diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index 8ba8aa0..8a1856c 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -121,7 +121,7 @@ static void riscv_obj_add_profiles_qdict(Object *obj, QDict *qdict_out) for (int i = 0; riscv_profiles[i] != NULL; i++) { profile = riscv_profiles[i]; - value = QOBJECT(qbool_from_bool(profile->enabled)); + value = QOBJECT(qbool_from_bool(profile->present)); qdict_put_obj(qdict_out, profile->name, value); } diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 55fd9e5..78fb279 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -451,6 +451,15 @@ static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) continue; } + /* + * cpu.debug = true is marked as 'sdtrig', priv spec 1.12. + * Skip this warning since existing CPUs with older priv + * spec and debug = true will be impacted. + */ + if (!strcmp(edata->name, "sdtrig")) { + continue; + } + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); /* @@ -830,6 +839,12 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_ssctr = false; } + if (cpu->cfg.ext_svrsw60t59b && + (!cpu->cfg.mmu || mcc->def->misa_mxl_max == MXL_RV32)) { + error_setg(errp, "svrsw60t59b is not supported on RV32 and MMU-less platforms"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. @@ -867,16 +882,11 @@ static void riscv_cpu_check_parent_profile(RISCVCPU *cpu, RISCVCPUProfile *profile, RISCVCPUProfile *parent) { - const char *parent_name; - bool parent_enabled; - - if (!profile->enabled || !parent) { + if (!profile->present || !parent) { return; } - parent_name = parent->name; - parent_enabled = object_property_get_bool(OBJECT(cpu), parent_name, NULL); - profile->enabled = parent_enabled; + profile->present = parent->present; } static void riscv_cpu_validate_profile(RISCVCPU *cpu, @@ -937,7 +947,7 @@ static void riscv_cpu_validate_profile(RISCVCPU *cpu, } } - profile->enabled = profile_impl; + profile->present = profile_impl; riscv_cpu_check_parent_profile(cpu, profile, profile->u_parent); riscv_cpu_check_parent_profile(cpu, profile, profile->s_parent); @@ -1166,6 +1176,70 @@ static bool riscv_cpu_is_generic(Object *cpu_obj) return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } +static void riscv_cpu_set_profile(RISCVCPU *cpu, + RISCVCPUProfile *profile, + bool enabled) +{ + int i, ext_offset; + + if (profile->u_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->u_parent, enabled); + } + + if (profile->s_parent != NULL) { + riscv_cpu_set_profile(cpu, profile->s_parent, enabled); + } + + profile->enabled = enabled; + + if (profile->enabled) { + cpu->env.priv_ver = profile->priv_spec; + +#ifndef CONFIG_USER_ONLY + if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { + object_property_set_bool(OBJECT(cpu), "mmu", true, NULL); + const char *satp_prop = satp_mode_str(profile->satp_mode, + riscv_cpu_is_32bit(cpu)); + object_property_set_bool(OBJECT(cpu), satp_prop, true, NULL); + } +#endif + } + + for (i = 0; misa_bits[i] != 0; i++) { + uint32_t bit = misa_bits[i]; + + if (!(profile->misa_ext & bit)) { + continue; + } + + if (bit == RVI && !profile->enabled) { + /* + * Disabling profiles will not disable the base + * ISA RV64I. + */ + continue; + } + + cpu_misa_ext_add_user_opt(bit, profile->enabled); + riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); + } + + for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { + ext_offset = profile->ext_offsets[i]; + + if (profile->enabled) { + if (cpu_cfg_offset_is_named_feat(ext_offset)) { + riscv_cpu_enable_named_feat(cpu, ext_offset); + } + + cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); + } + + cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); + isa_ext_update_enabled(cpu, ext_offset, profile->enabled); + } +} + /* * We'll get here via the following path: * @@ -1332,7 +1406,6 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, RISCVCPUProfile *profile = opaque; RISCVCPU *cpu = RISCV_CPU(obj); bool value; - int i, ext_offset; if (riscv_cpu_is_vendor(obj)) { error_setg(errp, "Profile %s is not available for vendor CPUs", @@ -1351,64 +1424,8 @@ static void cpu_set_profile(Object *obj, Visitor *v, const char *name, } profile->user_set = true; - profile->enabled = value; - if (profile->u_parent != NULL) { - object_property_set_bool(obj, profile->u_parent->name, - profile->enabled, NULL); - } - - if (profile->s_parent != NULL) { - object_property_set_bool(obj, profile->s_parent->name, - profile->enabled, NULL); - } - - if (profile->enabled) { - cpu->env.priv_ver = profile->priv_spec; - } - -#ifndef CONFIG_USER_ONLY - if (profile->satp_mode != RISCV_PROFILE_ATTR_UNUSED) { - object_property_set_bool(obj, "mmu", true, NULL); - const char *satp_prop = satp_mode_str(profile->satp_mode, - riscv_cpu_is_32bit(cpu)); - object_property_set_bool(obj, satp_prop, profile->enabled, NULL); - } -#endif - - for (i = 0; misa_bits[i] != 0; i++) { - uint32_t bit = misa_bits[i]; - - if (!(profile->misa_ext & bit)) { - continue; - } - - if (bit == RVI && !profile->enabled) { - /* - * Disabling profiles will not disable the base - * ISA RV64I. - */ - continue; - } - - cpu_misa_ext_add_user_opt(bit, profile->enabled); - riscv_cpu_write_misa_bit(cpu, bit, profile->enabled); - } - - for (i = 0; profile->ext_offsets[i] != RISCV_PROFILE_EXT_LIST_END; i++) { - ext_offset = profile->ext_offsets[i]; - - if (profile->enabled) { - if (cpu_cfg_offset_is_named_feat(ext_offset)) { - riscv_cpu_enable_named_feat(cpu, ext_offset); - } - - cpu_bump_multi_ext_priv_ver(&cpu->env, ext_offset); - } - - cpu_cfg_ext_add_user_opt(ext_offset, profile->enabled); - isa_ext_update_enabled(cpu, ext_offset, profile->enabled); - } + riscv_cpu_set_profile(cpu, profile, value); } static void cpu_get_profile(Object *obj, Visitor *v, const char *name, @@ -1423,7 +1440,7 @@ static void cpu_get_profile(Object *obj, Visitor *v, const char *name, static void riscv_cpu_add_profiles(Object *cpu_obj) { for (int i = 0; riscv_profiles[i] != NULL; i++) { - const RISCVCPUProfile *profile = riscv_profiles[i]; + RISCVCPUProfile *profile = riscv_profiles[i]; object_property_add(cpu_obj, profile->name, "bool", cpu_get_profile, cpu_set_profile, @@ -1435,30 +1452,11 @@ static void riscv_cpu_add_profiles(Object *cpu_obj) * case. */ if (profile->enabled) { - object_property_set_bool(cpu_obj, profile->name, true, NULL); + riscv_cpu_set_profile(RISCV_CPU(cpu_obj), profile, true); } } } -static bool cpu_ext_is_deprecated(const char *ext_name) -{ - return isupper(ext_name[0]); -} - -/* - * String will be allocated in the heap. Caller is responsible - * for freeing it. - */ -static char *cpu_ext_to_lower(const char *ext_name) -{ - char *ret = g_malloc0(strlen(ext_name) + 1); - - strcpy(ret, ext_name); - ret[0] = tolower(ret[0]); - - return ret; -} - static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -1471,13 +1469,6 @@ static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, return; } - if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { - g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); - - warn_report("CPU property '%s' is deprecated. Please use '%s' instead", - multi_ext_cfg->name, lower); - } - cpu_cfg_ext_add_user_opt(multi_ext_cfg->offset, value); prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); @@ -1513,14 +1504,13 @@ static void cpu_add_multi_ext_prop(Object *cpu_obj, const RISCVCPUMultiExtConfig *multi_cfg) { bool generic_cpu = riscv_cpu_is_generic(cpu_obj); - bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); object_property_add(cpu_obj, multi_cfg->name, "bool", cpu_get_multi_ext_cfg, cpu_set_multi_ext_cfg, NULL, (void *)multi_cfg); - if (!generic_cpu || deprecated_ext) { + if (!generic_cpu) { return; } @@ -1563,8 +1553,6 @@ static void riscv_cpu_add_user_properties(Object *obj) riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); - riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); - riscv_cpu_add_profiles(obj); } @@ -1606,6 +1594,8 @@ static void riscv_init_max_cpu_extensions(Object *obj) if (env->misa_mxl != MXL_RV32) { isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } else { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_svrsw60t59b), false); } /* diff --git a/target/riscv/time_helper.c b/target/riscv/time_helper.c index bc0d9a0..400e917 100644 --- a/target/riscv/time_helper.c +++ b/target/riscv/time_helper.c @@ -46,8 +46,23 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, { uint64_t diff, ns_diff, next; RISCVAclintMTimerState *mtimer = env->rdtime_fn_arg; - uint32_t timebase_freq = mtimer->timebase_freq; - uint64_t rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; + uint32_t timebase_freq; + uint64_t rtc_r; + + if (!riscv_cpu_cfg(env)->ext_sstc || !env->rdtime_fn || + !env->rdtime_fn_arg || !get_field(env->menvcfg, MENVCFG_STCE)) { + /* S/VS Timer IRQ depends on sstc extension, rdtime_fn(), and STCE. */ + return; + } + + if (timer_irq == MIP_VSTIP && + (!riscv_has_ext(env, RVH) || !get_field(env->henvcfg, HENVCFG_STCE))) { + /* VS Timer IRQ also depends on RVH and henvcfg.STCE. */ + return; + } + + timebase_freq = mtimer->timebase_freq; + rtc_r = env->rdtime_fn(env->rdtime_fn_arg) + delta; if (timecmp <= rtc_r) { /* @@ -125,6 +140,52 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, timer_mod(timer, next); } +/* + * When disabling xenvcfg.STCE, the S/VS Timer may be disabled at the same time. + * It is safe to call this function regardless of whether the timer has been + * deleted or not. timer_del() will do nothing if the timer has already + * been deleted. + */ +static void riscv_timer_disable_timecmp(CPURISCVState *env, QEMUTimer *timer, + uint32_t timer_irq) +{ + /* Disable S-mode Timer IRQ and HW-based STIP */ + if ((timer_irq == MIP_STIP) && !get_field(env->menvcfg, MENVCFG_STCE)) { + riscv_cpu_update_mip(env, timer_irq, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } + + /* Disable VS-mode Timer IRQ and HW-based VSTIP */ + if ((timer_irq == MIP_VSTIP) && + (!get_field(env->menvcfg, MENVCFG_STCE) || + !get_field(env->henvcfg, HENVCFG_STCE))) { + env->vstime_irq = 0; + riscv_cpu_update_mip(env, 0, BOOL_TO_MASK(0)); + timer_del(timer); + return; + } +} + +/* Enable or disable S/VS-mode Timer when xenvcfg.STCE is changed */ +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable) +{ + if (enable) { + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } else { + riscv_timer_disable_timecmp(env, env->vstimer, MIP_VSTIP); + } + + if (is_m_mode) { + if (enable) { + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + } else { + riscv_timer_disable_timecmp(env, env->stimer, MIP_STIP); + } + } +} + void riscv_timer_init(RISCVCPU *cpu) { CPURISCVState *env; diff --git a/target/riscv/time_helper.h b/target/riscv/time_helper.h index cacd79b..af1f634 100644 --- a/target/riscv/time_helper.h +++ b/target/riscv/time_helper.h @@ -25,6 +25,7 @@ void riscv_timer_write_timecmp(CPURISCVState *env, QEMUTimer *timer, uint64_t timecmp, uint64_t delta, uint32_t timer_irq); +void riscv_timer_stce_changed(CPURISCVState *env, bool is_m_mode, bool enable); void riscv_timer_init(RISCVCPU *cpu); #endif diff --git a/target/riscv/translate.c b/target/riscv/translate.c index d7a6de0..9ddef2d 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1217,13 +1217,35 @@ const RISCVDecoder decoder_table[] = { const size_t decoder_table_size = ARRAY_SIZE(decoder_table); -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) +static void decode_opc(CPURISCVState *env, DisasContext *ctx) { + uint32_t opcode; + bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0); + ctx->virt_inst_excp = false; - ctx->cur_insn_len = insn_len(opcode); + if (pc_is_4byte_align) { + /* + * Load 4 bytes at once to make instruction fetch atomically. + * + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be + * across pages. We could preload 4 bytes instruction no matter + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger + * additional page fault. + */ + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + } else { + /* + * For unaligned pc, instruction preload may trigger additional + * page fault so we only load 2 bytes here. + */ + opcode = (uint32_t) translator_lduw(env, &ctx->base, ctx->base.pc_next); + } + ctx->ol = ctx->xl; + + ctx->cur_insn_len = insn_len((uint16_t)opcode); /* Check for compressed insn */ if (ctx->cur_insn_len == 2) { - ctx->opcode = opcode; + ctx->opcode = (uint16_t)opcode; /* * The Zca extension is added as way to refer to instructions in the C * extension that do not include the floating-point loads and stores @@ -1233,15 +1255,17 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode) return; } } else { - uint32_t opcode32 = opcode; - opcode32 = deposit32(opcode32, 16, 16, - translator_lduw(env, &ctx->base, - ctx->base.pc_next + 2)); - ctx->opcode = opcode32; + if (!pc_is_4byte_align) { + /* Load last 2 bytes of instruction here */ + opcode = deposit32(opcode, 16, 16, + translator_lduw(env, &ctx->base, + ctx->base.pc_next + 2)); + } + ctx->opcode = opcode; for (guint i = 0; i < ctx->decoders->len; ++i) { riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); - if (func(ctx, opcode32)) { + if (func(ctx, opcode)) { return; } } @@ -1319,10 +1343,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu_env(cpu); - uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); - ctx->ol = ctx->xl; - decode_opc(env, ctx, opcode16); + decode_opc(env, ctx); ctx->base.pc_next += ctx->cur_insn_len; /* diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 5dc1c10..b41c29d 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -35,7 +35,7 @@ #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, - target_ulong s2) + target_ulong s2, target_ulong x0) { int vlmax, vl; RISCVCPU *cpu = env_archcpu(env); @@ -83,6 +83,16 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } else { vl = vlmax; } + + if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) { + /* only set vill bit. */ + env->vill = 1; + env->vtype = 0; + env->vl = 0; + env->vstart = 0; + return 0; + } + env->vl = vl; env->vtype = s2; env->vstart = 0; diff --git a/tests/data/acpi/riscv64/virt/RHCT b/tests/data/acpi/riscv64/virt/RHCT Binary files differindex 13c8025..52a4cc4 100644 --- a/tests/data/acpi/riscv64/virt/RHCT +++ b/tests/data/acpi/riscv64/virt/RHCT diff --git a/tests/functional/aspeed.py b/tests/functional/aspeed.py index 7a40d5d..b131703 100644 --- a/tests/functional/aspeed.py +++ b/tests/functional/aspeed.py @@ -8,8 +8,13 @@ from qemu_test import LinuxKernelTest class AspeedTest(LinuxKernelTest): def do_test_arm_aspeed_openbmc(self, machine, image, uboot='2019.04', - cpu_id='0x0', soc='AST2500 rev A1'): - hostname = machine.removesuffix('-bmc') + cpu_id='0x0', soc='AST2500 rev A1', + image_hostname=None): + # Allow for the image hostname to not end in "-bmc" + if image_hostname is not None: + hostname = image_hostname + else: + hostname = machine.removesuffix('-bmc') self.set_machine(machine) self.vm.set_console() diff --git a/tests/functional/meson.build b/tests/functional/meson.build index b542b3a..050c900 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -32,6 +32,8 @@ test_timeouts = { 'arm_aspeed_ast2500' : 720, 'arm_aspeed_ast2600' : 1200, 'arm_aspeed_bletchley' : 480, + 'arm_aspeed_catalina' : 480, + 'arm_aspeed_gb200nvl_bmc' : 480, 'arm_aspeed_rainier' : 480, 'arm_bpim2u' : 500, 'arm_collie' : 180, @@ -127,6 +129,8 @@ tests_arm_system_thorough = [ 'arm_aspeed_ast2500', 'arm_aspeed_ast2600', 'arm_aspeed_bletchley', + 'arm_aspeed_catalina', + 'arm_aspeed_gb200nvl_bmc', 'arm_aspeed_rainier', 'arm_bpim2u', 'arm_canona1100', diff --git a/tests/functional/test_arm_aspeed_catalina.py b/tests/functional/test_arm_aspeed_catalina.py new file mode 100755 index 0000000..dc2f24e --- /dev/null +++ b/tests/functional/test_arm_aspeed_catalina.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +# +# Functional test that boots the ASPEED machines +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import Asset +from aspeed import AspeedTest + + +class CatalinaMachine(AspeedTest): + + ASSET_CATALINA_FLASH = Asset( + 'https://github.com/legoater/qemu-aspeed-boot/raw/a866feb5ef81245b4827a214584bf6bcc72939f6/images/catalina-bmc/obmc-phosphor-image-catalina-20250619123021.static.mtd.xz', + '287402e1ba021991e06be1d098f509444a02a3d81a73a932f66528b159e864f9') + + def test_arm_ast2600_catalina_openbmc(self): + image_path = self.uncompress(self.ASSET_CATALINA_FLASH) + + self.do_test_arm_aspeed_openbmc('catalina-bmc', image=image_path, + uboot='2019.04', cpu_id='0xf00', + soc='AST2600 rev A3') + +if __name__ == '__main__': + AspeedTest.main() diff --git a/tests/functional/test_arm_aspeed_gb200nvl_bmc.py b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py new file mode 100644 index 0000000..8e8e3f0 --- /dev/null +++ b/tests/functional/test_arm_aspeed_gb200nvl_bmc.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# +# Functional test that boots the ASPEED machines +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import Asset +from aspeed import AspeedTest + + +class GB200Machine(AspeedTest): + + ASSET_GB200_FLASH = Asset( + 'https://github.com/legoater/qemu-aspeed-boot/raw/refs/heads/master/images/gb200nvl-obmc/obmc-phosphor-image-gb200nvl-obmc-20250702182348.static.mtd.xz', + 'b84819317cb3dc762895ad507705978ef000bfc77c50c33a63bdd37921db0dbc') + + def test_arm_aspeed_gb200_openbmc(self): + image_path = self.uncompress(self.ASSET_GB200_FLASH) + + self.do_test_arm_aspeed_openbmc('gb200nvl-bmc', image=image_path, + uboot='2019.04', cpu_id='0xf00', + soc='AST2600 rev A3', + image_hostname='gb200nvl-obmc') + +if __name__ == '__main__': + AspeedTest.main() diff --git a/tests/qtest/aspeed_scu-test.c b/tests/qtest/aspeed_scu-test.c new file mode 100644 index 0000000..ca09f91 --- /dev/null +++ b/tests/qtest/aspeed_scu-test.c @@ -0,0 +1,231 @@ +/* + * QTest testcase for the ASPEED AST2500 and AST2600 SCU. + * + * SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (C) 2025 Tan Siewert + */ + +#include "qemu/osdep.h" +#include "libqtest-single.h" + +/* + * SCU base, as well as protection key are + * the same on AST2500 and 2600. + */ +#define AST_SCU_BASE 0x1E6E2000 +#define AST_SCU_PROT_LOCK_STATE 0x0 +#define AST_SCU_PROT_LOCK_VALUE 0x2 +#define AST_SCU_PROT_UNLOCK_STATE 0x1 +#define AST_SCU_PROT_UNLOCK_VALUE 0x1688A8A8 + +#define AST2500_MACHINE "-machine ast2500-evb" +#define AST2500_SCU_PROT_REG 0x00 +#define AST2500_SCU_MISC_2_CONTROL_REG 0x4C + +#define AST2600_MACHINE "-machine ast2600-evb" +/* AST2600 has two protection registers */ +#define AST2600_SCU_PROT_REG 0x000 +#define AST2600_SCU_PROT_REG2 0x010 +#define AST2600_SCU_MISC_2_CONTROL_REG 0x0C4 + +#define TEST_LOCK_ARBITRARY_VALUE 0xABCDEFAB + +/** + * Assert that a given register matches an expected value. + * + * Reads the register and checks if its value equals the expected value. + * + * @param *s - QTest machine state + * @param reg - Address of the register to be checked + * @param expected - Expected register value + */ +static inline void assert_register_eq(QTestState *s, + uint32_t reg, + uint32_t expected) +{ + uint32_t value = qtest_readl(s, reg); + g_assert_cmphex(value, ==, expected); +} + +/** + * Assert that a given register does not match a specific value. + * + * Reads the register and checks that its value is not equal to the + * provided value. + * + * @param *s - QTest machine state + * @param reg - Address of the register to be checked + * @param not_expected - Value the register must not contain + */ +static inline void assert_register_neq(QTestState *s, + uint32_t reg, + uint32_t not_expected) +{ + uint32_t value = qtest_readl(s, reg); + g_assert_cmphex(value, !=, not_expected); +} + +/** + * Test whether the SCU can be locked and unlocked correctly. + * + * When testing multiple registers, this function assumes that writing + * to the first register also affects the others. However, writing to + * any other register only affects itself. + * + * @param *machine - input machine configuration, passed directly + * to QTest + * @param regs[] - List of registers to be checked + * @param regc - amount of arguments for registers to be checked + */ +static void test_protection_register(const char *machine, + const uint32_t regs[], + const int regc) +{ + QTestState *s = qtest_init(machine); + + for (int i = 0; i < regc; i++) { + uint32_t reg = regs[i]; + + qtest_writel(s, reg, AST_SCU_PROT_UNLOCK_VALUE); + assert_register_eq(s, reg, AST_SCU_PROT_UNLOCK_STATE); + + /** + * Check that other registers are unlocked too, if more + * than one is available. + */ + if (regc > 1 && i == 0) { + /* Initialise at 1 instead of 0 to skip first */ + for (int j = 1; j < regc; j++) { + uint32_t add_reg = regs[j]; + assert_register_eq(s, add_reg, AST_SCU_PROT_UNLOCK_STATE); + } + } + + /* Lock the register again */ + qtest_writel(s, reg, AST_SCU_PROT_LOCK_VALUE); + assert_register_eq(s, reg, AST_SCU_PROT_LOCK_STATE); + + /* And the same for locked state */ + if (regc > 1 && i == 0) { + /* Initialise at 1 instead of 0 to skip first */ + for (int j = 1; j < regc; j++) { + uint32_t add_reg = regs[j]; + assert_register_eq(s, add_reg, AST_SCU_PROT_LOCK_STATE); + } + } + } + + qtest_quit(s); +} + +static void test_2500_protection_register(void) +{ + uint32_t regs[] = { AST_SCU_BASE + AST2500_SCU_PROT_REG }; + + test_protection_register(AST2500_MACHINE, + regs, + ARRAY_SIZE(regs)); +} + +static void test_2600_protection_register(void) +{ + /** + * The AST2600 has two protection registers, both + * being required to be unlocked to do any operation. + * + * Modifying SCU000 also modifies SCU010, but modifying + * SCU010 only will keep SCU000 untouched. + */ + uint32_t regs[] = { AST_SCU_BASE + AST2600_SCU_PROT_REG, + AST_SCU_BASE + AST2600_SCU_PROT_REG2 }; + + test_protection_register(AST2600_MACHINE, + regs, + ARRAY_SIZE(regs)); +} + +/** + * Test if SCU register writes are correctly allowed or blocked + * depending on the protection register state. + * + * The test first locks the protection register and verifies that + * writes to the target SCU register are rejected. It then unlocks + * the protection register and confirms that the written value is + * retained when unlocked. + * + * @param *machine - input machine configuration, passed directly + * to QTest + * @param protection_register - first SCU protection key register + * (only one for keeping it simple) + * @param test_register - Register to be used for writing arbitrary + * values + */ +static void test_write_permission_lock_state(const char *machine, + const uint32_t protection_register, + const uint32_t test_register) +{ + QTestState *s = qtest_init(machine); + + /* Arbitrary value to lock provided SCU protection register */ + qtest_writel(s, protection_register, AST_SCU_PROT_LOCK_VALUE); + + /* Ensure that the SCU is really locked */ + assert_register_eq(s, protection_register, AST_SCU_PROT_LOCK_STATE); + + /* Write a known arbitrary value to test that the write is blocked */ + qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /* We do not want to have the written value to be saved */ + assert_register_neq(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /** + * Unlock the SCU and verify that it can be written to. + * Assumes that the first SCU protection register is sufficient to + * unlock all protection registers, if multiple are present. + */ + qtest_writel(s, protection_register, AST_SCU_PROT_UNLOCK_VALUE); + assert_register_eq(s, protection_register, AST_SCU_PROT_UNLOCK_STATE); + + /* Write a known arbitrary value to test that the write works */ + qtest_writel(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + /* Ensure that the written value is retained */ + assert_register_eq(s, test_register, TEST_LOCK_ARBITRARY_VALUE); + + qtest_quit(s); +} + +static void test_2500_write_permission_lock_state(void) +{ + test_write_permission_lock_state( + AST2500_MACHINE, + AST_SCU_BASE + AST2500_SCU_PROT_REG, + AST_SCU_BASE + AST2500_SCU_MISC_2_CONTROL_REG + ); +} + +static void test_2600_write_permission_lock_state(void) +{ + test_write_permission_lock_state( + AST2600_MACHINE, + AST_SCU_BASE + AST2600_SCU_PROT_REG, + AST_SCU_BASE + AST2600_SCU_MISC_2_CONTROL_REG + ); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/ast2500/scu/protection_register", + test_2500_protection_register); + qtest_add_func("/ast2600/scu/protection_register", + test_2600_protection_register); + + qtest_add_func("/ast2500/scu/write_permission_lock_state", + test_2500_write_permission_lock_state); + qtest_add_func("/ast2600/scu/write_permission_lock_state", + test_2600_write_permission_lock_state); + + return g_test_run(); +} diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 8ad8490..91b4a71 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -215,6 +215,7 @@ qtests_npcm8xx = \ qtests_aspeed = \ ['aspeed_gpio-test', 'aspeed_hace-test', + 'aspeed_scu-test', 'aspeed_smc-test'] qtests_aspeed64 = \ ['ast2700-gpio-test', diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index 040d042..cf71876 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -51,7 +51,6 @@ static int query_error_class(const char *cmd) { "x-query-usb", ERROR_CLASS_GENERIC_ERROR }, /* Only valid with accel=tcg */ { "x-query-jit", ERROR_CLASS_GENERIC_ERROR }, - { "x-query-opcount", ERROR_CLASS_GENERIC_ERROR }, { "xen-event-list", ERROR_CLASS_GENERIC_ERROR }, { NULL, -1 } }; diff --git a/tests/tcg/riscv64/Makefile.softmmu-target b/tests/tcg/riscv64/Makefile.softmmu-target index 7c1d44d..3ca5953 100644 --- a/tests/tcg/riscv64/Makefile.softmmu-target +++ b/tests/tcg/riscv64/Makefile.softmmu-target @@ -20,5 +20,9 @@ EXTRA_RUNS += run-issue1060 run-issue1060: issue1060 $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) +EXTRA_RUNS += run-test-mepc-masking +run-test-mepc-masking: test-mepc-masking + $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) + # We don't currently support the multiarch system tests undefine MULTIARCH_TESTS diff --git a/tests/tcg/riscv64/test-mepc-masking.S b/tests/tcg/riscv64/test-mepc-masking.S new file mode 100644 index 0000000..fccd2a7 --- /dev/null +++ b/tests/tcg/riscv64/test-mepc-masking.S @@ -0,0 +1,73 @@ +/* + * Test for MEPC masking bug fix + * + * This test verifies that MEPC properly masks the lower bits according + * to the RISC-V specification when vectored mode bits from STVEC are + * written to MEPC. + */ + + .option norvc + + .text + .global _start +_start: + /* Set up machine trap vector */ + lla t0, machine_trap_handler + csrw mtvec, t0 + + /* Set STVEC with vectored mode (mode bits = 01) */ + li t0, 0x80004001 + csrw stvec, t0 + + /* Clear medeleg to handle exceptions in M-mode */ + csrw medeleg, zero + + /* Trigger illegal instruction exception */ + .word 0xffffffff + +test_completed: + /* Exit with result in a0 */ + /* a0 = 0: success (bits [1:0] were masked) */ + /* a0 != 0: failure (some bits were not masked) */ + j _exit + +machine_trap_handler: + /* Check if illegal instruction (mcause = 2) */ + csrr t0, mcause + li t1, 2 + bne t0, t1, skip_test + + /* Test: Copy STVEC (with mode bits) to MEPC */ + csrr t0, stvec /* t0 = 0x80004001 */ + csrw mepc, t0 /* Write to MEPC */ + csrr t1, mepc /* Read back MEPC */ + + /* Check if bits [1:0] are masked (IALIGN=32 without RVC) */ + andi a0, t1, 3 /* a0 = 0 if both bits masked correctly */ + + /* Set correct return address */ + lla t0, test_completed + csrw mepc, t0 + +skip_test: + mret + +/* Exit with semihosting */ +_exit: + lla a1, semiargs + li t0, 0x20026 /* ADP_Stopped_ApplicationExit */ + sd t0, 0(a1) + sd a0, 8(a1) + li a0, 0x20 /* TARGET_SYS_EXIT_EXTENDED */ + + /* Semihosting call sequence */ + .balign 16 + slli zero, zero, 0x1f + ebreak + srai zero, zero, 0x7 + j . + + .data + .balign 8 +semiargs: + .space 16 |