diff options
79 files changed, 2371 insertions, 680 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cef54de..19f67dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2198,6 +2198,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com> S: Supported F: backends/iommufd.c F: include/sysemu/iommufd.h +F: backends/host_iommu_device.c +F: include/sysemu/host_iommu_device.h F: include/qemu/chardev_open.h F: util/chardev_open.c F: docs/devel/vfio-iommufd.rst @@ -3312,6 +3314,7 @@ F: tests/qtest/ F: docs/devel/qgraph.rst F: docs/devel/qtest.rst X: tests/qtest/bios-tables-test* +X: tests/qtest/migration-* Device Fuzzing M: Alexander Bulekov <alxndr@bu.edu> @@ -3408,7 +3411,7 @@ F: include/qemu/userfaultfd.h F: migration/ F: scripts/vmstate-static-checker.py F: tests/vmstate-static-checker-data/ -F: tests/qtest/migration-test.c +F: tests/qtest/migration-* F: docs/devel/migration/ F: qapi/migration.json F: tests/migration/ diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index b2a37a2..ac08cfb 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -52,7 +52,7 @@ #include "qemu/main-loop.h" #include "exec/address-spaces.h" #include "exec/exec-all.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "sysemu/cpus.h" #include "sysemu/hvf.h" #include "sysemu/hvf_int.h" diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 854cb86..2b4ab89 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -27,7 +27,7 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/s390x/adapter.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "sysemu/cpus.h" diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index f6056ac..bf14032 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -24,6 +24,18 @@ #include "qemu/main-loop.h" #include "hw/core/cpu.h" +static int64_t qtest_clock_counter; + +static int64_t qtest_get_virtual_clock(void) +{ + return qatomic_read_i64(&qtest_clock_counter); +} + +static void qtest_set_virtual_clock(int64_t count) +{ + qatomic_set_i64(&qtest_clock_counter, count); +} + static int qtest_init_accel(MachineState *ms) { return 0; @@ -52,6 +64,7 @@ static void qtest_accel_ops_class_init(ObjectClass *oc, void *data) ops->create_vcpu_thread = dummy_start_vcpu_thread; ops->get_virtual_clock = qtest_get_virtual_clock; + ops->set_virtual_clock = qtest_set_virtual_clock; }; static const TypeInfo qtest_accel_ops_type = { diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc index c82048e..87ceb95 100644 --- a/accel/tcg/ldst_common.c.inc +++ b/accel/tcg/ldst_common.c.inc @@ -125,7 +125,9 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi) static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + } } uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra) @@ -188,7 +190,9 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr, static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi) { - qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); + if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) { + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); + } } void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index cc1634e..b6bae32 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -240,13 +240,13 @@ static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb, { switch (cb->type) { case PLUGIN_CB_MEM_REGULAR: - if (rw && cb->regular.rw) { + if (rw & cb->regular.rw) { gen_mem_cb(&cb->regular, meminfo, addr); } break; case PLUGIN_CB_INLINE_ADD_U64: case PLUGIN_CB_INLINE_STORE_U64: - if (rw && cb->inline_insn.rw) { + if (rw & cb->inline_insn.rw) { inject_cb(cb); } break; diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 1433e38..3c19e68 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -35,7 +35,7 @@ #include "exec/exec-all.h" #include "exec/hwaddr.h" #include "exec/tb-flush.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "hw/core/cpu.h" diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c new file mode 100644 index 0000000..8f2dda1 --- /dev/null +++ b/backends/host_iommu_device.c @@ -0,0 +1,33 @@ +/* + * Host IOMMU device abstract + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/host_iommu_device.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice, + host_iommu_device, + HOST_IOMMU_DEVICE, + OBJECT) + +static void host_iommu_device_class_init(ObjectClass *oc, void *data) +{ +} + +static void host_iommu_device_init(Object *obj) +{ +} + +static void host_iommu_device_finalize(Object *obj) +{ + HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj); + + g_free(hiod->name); +} diff --git a/backends/iommufd.c b/backends/iommufd.c index c506afb..84fefbc 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -208,23 +208,69 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, return ret; } -static const TypeInfo iommufd_backend_info = { - .name = TYPE_IOMMUFD_BACKEND, - .parent = TYPE_OBJECT, - .instance_size = sizeof(IOMMUFDBackend), - .instance_init = iommufd_backend_init, - .instance_finalize = iommufd_backend_finalize, - .class_size = sizeof(IOMMUFDBackendClass), - .class_init = iommufd_backend_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp) +{ + struct iommu_hw_info info = { + .size = sizeof(info), + .dev_id = devid, + .data_len = len, + .data_uptr = (uintptr_t)data, + }; + + if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { + error_setg_errno(errp, errno, "Failed to get hardware info"); + return false; } -}; -static void register_types(void) + g_assert(type); + *type = info.out_data_type; + + return true; +} + +static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) { - type_register_static(&iommufd_backend_info); + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: + return caps->type; + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } } -type_init(register_types); +static void hiod_iommufd_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->get_cap = hiod_iommufd_get_cap; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_IOMMUFD_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(IOMMUFDBackend), + .instance_init = iommufd_backend_init, + .instance_finalize = iommufd_backend_finalize, + .class_size = sizeof(IOMMUFDBackendClass), + .class_init = iommufd_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_iommufd_class_init, + .abstract = true, + } +}; + +DEFINE_TYPES(types) diff --git a/backends/meson.build b/backends/meson.build index 8b2b111..106312f 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -16,6 +16,7 @@ if host_os != 'windows' endif if host_os == 'linux' system_ss.add(files('hostmem-memfd.c')) + system_ss.add(files('host_iommu_device.c')) endif if keyutils.found() system_ss.add(keyutils, files('cryptodev-lkcf.c')) diff --git a/configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak index f688ffa..917980e 100644 --- a/configs/targets/riscv64-softmmu.mak +++ b/configs/targets/riscv64-softmmu.mak @@ -1,6 +1,7 @@ TARGET_ARCH=riscv64 TARGET_BASE_ARCH=riscv TARGET_SUPPORTS_MTTCG=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml # needed by boot.c TARGET_NEED_FDT=y diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index 0b64d2c..449ead1 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -27,6 +27,7 @@ endif NAMES += hwprofile NAMES += cache NAMES += drcov +NAMES += ips ifeq ($(CONFIG_WIN32),y) SO_SUFFIX := .dll diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c new file mode 100644 index 0000000..29fa556 --- /dev/null +++ b/contrib/plugins/ips.c @@ -0,0 +1,164 @@ +/* + * Instructions Per Second (IPS) rate limiting plugin. + * + * This plugin can be used to restrict the execution of a system to a + * particular number of Instructions Per Second (IPS). This controls + * time as seen by the guest so while wall-clock time may be longer + * from the guests point of view time will pass at the normal rate. + * + * This uses the new plugin API which allows the plugin to control + * system time. + * + * Copyright (c) 2023 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <glib.h> +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +/* how many times do we update time per sec */ +#define NUM_TIME_UPDATE_PER_SEC 10 +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000) + +static GMutex global_state_lock; + +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */ +static uint64_t max_insn_per_quantum; /* trap every N instructions */ +static int64_t virtual_time_ns; /* last set virtual time */ + +static const void *time_handle; + +typedef struct { + uint64_t total_insn; + uint64_t quantum_insn; /* insn in last quantum */ + int64_t last_quantum_time; /* time when last quantum started */ +} vCPUTime; + +struct qemu_plugin_scoreboard *vcpus; + +/* return epoch time in ns */ +static int64_t now_ns(void) +{ + return g_get_real_time() * 1000; +} + +static uint64_t num_insn_during(int64_t elapsed_ns) +{ + double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC; + return num_secs * (double) max_insn_per_second; +} + +static int64_t time_for_insn(uint64_t num_insn) +{ + double num_secs = (double) num_insn / (double) max_insn_per_second; + return num_secs * (double) NSEC_IN_ONE_SEC; +} + +static void update_system_time(vCPUTime *vcpu) +{ + int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time; + uint64_t max_insn = num_insn_during(elapsed_ns); + + if (vcpu->quantum_insn >= max_insn) { + /* this vcpu ran faster than expected, so it has to sleep */ + uint64_t insn_advance = vcpu->quantum_insn - max_insn; + uint64_t time_advance_ns = time_for_insn(insn_advance); + int64_t sleep_us = time_advance_ns / 1000; + g_usleep(sleep_us); + } + + vcpu->total_insn += vcpu->quantum_insn; + vcpu->quantum_insn = 0; + vcpu->last_quantum_time = now_ns(); + + /* based on total number of instructions, what should be the new time? */ + int64_t new_virtual_time = time_for_insn(vcpu->total_insn); + + g_mutex_lock(&global_state_lock); + + /* Time only moves forward. Another vcpu might have updated it already. */ + if (new_virtual_time > virtual_time_ns) { + qemu_plugin_update_ns(time_handle, new_virtual_time); + virtual_time_ns = new_virtual_time; + } + + g_mutex_unlock(&global_state_lock); +} + +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index) +{ + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); + vcpu->total_insn = 0; + vcpu->quantum_insn = 0; + vcpu->last_quantum_time = now_ns(); +} + +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index) +{ + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); + update_system_time(vcpu); +} + +static void every_quantum_insn(unsigned int cpu_index, void *udata) +{ + vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index); + g_assert(vcpu->quantum_insn >= max_insn_per_quantum); + update_system_time(vcpu); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + size_t n_insns = qemu_plugin_tb_n_insns(tb); + qemu_plugin_u64 quantum_insn = + qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn); + /* count (and eventually trap) once per tb */ + qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu( + tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns); + qemu_plugin_register_vcpu_tb_exec_cond_cb( + tb, every_quantum_insn, + QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE, + quantum_insn, max_insn_per_quantum, NULL); +} + +static void plugin_exit(qemu_plugin_id_t id, void *udata) +{ + qemu_plugin_scoreboard_free(vcpus); +} + +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, int argc, + char **argv) +{ + for (int i = 0; i < argc; i++) { + char *opt = argv[i]; + g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); + if (g_strcmp0(tokens[0], "ips") == 0) { + max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10); + if (!max_insn_per_second && errno) { + fprintf(stderr, "%s: couldn't parse %s (%s)\n", + __func__, tokens[1], g_strerror(errno)); + return -1; + } + } else { + fprintf(stderr, "option parsing failed: %s\n", opt); + return -1; + } + } + + vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime)); + max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC; + + time_handle = qemu_plugin_request_time_control(); + g_assert(time_handle); + + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + qemu_plugin_register_vcpu_init_cb(id, vcpu_init); + qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit); + qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); + + return 0; +} diff --git a/gdbstub/user.c b/gdbstub/user.c index edeb72e..e34b58b 100644 --- a/gdbstub/user.c +++ b/gdbstub/user.c @@ -18,6 +18,7 @@ #include "exec/gdbstub.h" #include "gdbstub/syscalls.h" #include "gdbstub/user.h" +#include "gdbstub/enums.h" #include "hw/core/cpu.h" #include "trace.h" #include "internals.h" diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c4350e0..37c21a0a 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -61,6 +61,12 @@ struct vtd_as_key { uint32_t pasid; }; +/* bus/devfn is PCI device's real BDF not the aliased one */ +struct vtd_hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v) return (guint)(value << 8 | key->devfn); } +/* Same implementation as vtd_as_hash() */ +static guint vtd_hiod_hash(gconstpointer v) +{ + return vtd_as_hash(v); +} + +static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_hiod_key *key1 = v1; + const struct vtd_hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static void vtd_hiod_destroy(gpointer v) +{ + object_unref(v); +} + static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, gpointer user_data) { @@ -3812,6 +3837,87 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, + Error **errp) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + int ret; + + if (!hiodc->get_cap) { + error_setg(errp, ".get_cap() not implemented"); + return false; + } + + /* Common checks */ + ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp); + if (ret < 0) { + return false; + } + if (s->aw_bits > ret) { + error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret); + return false; + } + + return true; +} + +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + struct vtd_as_key *new_key; + + assert(hiod); + + vtd_iommu_lock(s); + + if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + error_setg(errp, "Host IOMMU device already exist"); + vtd_iommu_unlock(s); + return false; + } + + if (!vtd_check_hiod(s, hiod, errp)) { + vtd_iommu_unlock(s); + return false; + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod); + + vtd_iommu_unlock(s); + + return true; +} + +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + + vtd_iommu_lock(s); + + if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + vtd_iommu_unlock(s); + return; + } + + g_hash_table_remove(s->vtd_host_iommu_dev, &key); + + vtd_iommu_unlock(s); +} + /* Unmap the whole range in the notifier's scope. */ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) { @@ -3934,30 +4040,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) return; } -/* Do the initialization. It will also be called when reset, so pay - * attention when adding new initialization stuff. - */ -static void vtd_init(IntelIOMMUState *s) +static void vtd_cap_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - memset(s->csr, 0, DMAR_REG_SIZE); - memset(s->wmask, 0, DMAR_REG_SIZE); - memset(s->w1cmask, 0, DMAR_REG_SIZE); - memset(s->womask, 0, DMAR_REG_SIZE); - - s->root = 0; - s->root_scalable = false; - s->dmar_enabled = false; - s->intr_enabled = false; - s->iq_head = 0; - s->iq_tail = 0; - s->iq = 0; - s->iq_size = 0; - s->qi_enabled = false; - s->iq_last_desc_type = VTD_INV_DESC_NONE; - s->iq_dw = false; - s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | VTD_CAP_MGAW(s->aw_bits); @@ -3974,27 +4060,6 @@ static void vtd_init(IntelIOMMUState *s) } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; - /* - * Rsvd field masks for spte - */ - vtd_spte_rsvd[0] = ~0ULL; - vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); - - vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - - if (s->scalable_mode || s->snoop_control) { - vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; - } - if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -4027,6 +4092,56 @@ static void vtd_init(IntelIOMMUState *s) if (s->pasid) { s->ecap |= VTD_ECAP_PASID; } +} + +/* + * Do the initialization. It will also be called when reset, so pay + * attention when adding new initialization stuff. + */ +static void vtd_init(IntelIOMMUState *s) +{ + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + memset(s->csr, 0, DMAR_REG_SIZE); + memset(s->wmask, 0, DMAR_REG_SIZE); + memset(s->w1cmask, 0, DMAR_REG_SIZE); + memset(s->womask, 0, DMAR_REG_SIZE); + + s->root = 0; + s->root_scalable = false; + s->dmar_enabled = false; + s->intr_enabled = false; + s->iq_head = 0; + s->iq_tail = 0; + s->iq = 0; + s->iq_size = 0; + s->qi_enabled = false; + s->iq_last_desc_type = VTD_INV_DESC_NONE; + s->iq_dw = false; + s->next_frcd_reg = 0; + + vtd_cap_init(s); + + /* + * Rsvd field masks for spte + */ + vtd_spte_rsvd[0] = ~0ULL; + vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + + if (s->scalable_mode || s->snoop_control) { + vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; + } vtd_reset_caches(s); @@ -4107,6 +4222,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, + .set_iommu_device = vtd_dev_set_iommu_device, + .unset_iommu_device = vtd_dev_unset_iommu_device, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) @@ -4226,6 +4343,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); + s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal, + g_free, vtd_hiod_destroy); vtd_init(s); pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 324c130..50b86d5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) } } -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +/* + * Get IOMMU root bus, aliased bus and devfn of a PCI device + * + * IOMMU root bus is needed by all call sites to call into iommu_ops. + * For call sites which don't need aliased BDF, passing NULL to + * aliased_[bus|devfn] is allowed. + * + * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device. + * + * @aliased_bus: return aliased #PCIBus of the PCI device, optional. + * + * @aliased_devfn: return aliased devfn of the PCI device, optional. + */ +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, + PCIBus **piommu_bus, + PCIBus **aliased_bus, + int *aliased_devfn) { PCIBus *bus = pci_get_bus(dev); PCIBus *iommu_bus = bus; - uint8_t devfn = dev->devfn; + int devfn = dev->devfn; while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); @@ -2693,13 +2709,70 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); + assert(iommu_bus); + + if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) { + iommu_bus = NULL; + } + + *piommu_bus = iommu_bus; + + if (aliased_bus) { + *aliased_bus = bus; + } + + if (aliased_devfn) { + *aliased_devfn = devfn; + } +} + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus) { return iommu_bus->iommu_ops->get_address_space(bus, iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp) +{ + PCIBus *iommu_bus, *aliased_bus; + int aliased_devfn; + + /* set_iommu_device requires device's direct BDF instead of aliased BDF */ + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, + &aliased_bus, &aliased_devfn); + if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) { + hiod->aliased_bus = aliased_bus; + hiod->aliased_devfn = aliased_devfn; + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn, hiod, errp); + } + return true; +} + +void pci_device_unset_iommu_device(PCIDevice *dev) +{ + PCIBus *iommu_bus; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) { + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn); + } +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 5676d66..bc0893e 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -515,6 +515,9 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size; g_autofree uint32_t *imsic_cells = NULL; g_autofree uint32_t *imsic_regs = NULL; + static const char * const imsic_compat[2] = { + "qemu,imsics", "riscv,imsics" + }; imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); imsic_regs = g_new0(uint32_t, socket_count * 4); @@ -538,13 +541,18 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, } } - imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); + imsic_name = g_strdup_printf("/soc/interrupt-controller@%lx", + (unsigned long)base_addr); qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); + qemu_fdt_setprop_string_array(ms->fdt, imsic_name, "compatible", + (char **)&imsic_compat, + ARRAY_SIZE(imsic_compat)); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", FDT_IMSIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#msi-cells", 0); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, @@ -588,6 +596,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, } +/* Caller must free string after use */ +static char *fdt_get_aplic_nodename(unsigned long aplic_addr) +{ + return g_strdup_printf("/soc/interrupt-controller@%lx", aplic_addr); +} + static void create_fdt_one_aplic(RISCVVirtState *s, int socket, unsigned long aplic_addr, uint32_t aplic_size, uint32_t msi_phandle, @@ -597,18 +611,24 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, bool m_mode, int num_harts) { int cpu; - g_autofree char *aplic_name = NULL; + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); g_autofree uint32_t *aplic_cells = g_new0(uint32_t, num_harts * 2); MachineState *ms = MACHINE(s); + static const char * const aplic_compat[2] = { + "qemu,aplic", "riscv,aplic" + }; for (cpu = 0; cpu < num_harts; cpu++) { aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_string_array(ms->fdt, aplic_name, "compatible", + (char **)&aplic_compat, + ARRAY_SIZE(aplic_compat)); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#address-cells", + FDT_APLIC_ADDR_CELLS); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#interrupt-cells", FDT_APLIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); @@ -628,7 +648,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, if (aplic_child_phandle) { qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", aplic_child_phandle); - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegation", aplic_child_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); } @@ -646,7 +666,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, uint32_t *aplic_phandles, int num_harts) { - g_autofree char *aplic_name = NULL; unsigned long aplic_addr; MachineState *ms = MACHINE(s); uint32_t aplic_m_phandle, aplic_s_phandle; @@ -672,9 +691,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, aplic_s_phandle, 0, false, num_harts); - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - if (!socket) { + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, memmap[VIRT_PLATFORM_BUS].base, memmap[VIRT_PLATFORM_BUS].size, @@ -1746,6 +1764,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->init = virt_machine_init; mc->max_cpus = VIRT_CPUS_MAX; mc->default_cpu_type = TYPE_RISCV_CPU_BASE; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; mc->pci_allow_0_address = true; mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index fb8c1ac..a7d682e 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -31,9 +31,10 @@ static void ccw_device_refill_ids(CcwDevice *dev) dev->subch_id.valid = true; } -static void ccw_device_realize(CcwDevice *dev, Error **errp) +static bool ccw_device_realize(CcwDevice *dev, Error **errp) { ccw_device_refill_ids(dev); + return true; } static Property ccw_device_properties[] = { diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h index 6dff952..5feeb0e 100644 --- a/hw/s390x/ccw-device.h +++ b/hw/s390x/ccw-device.h @@ -36,7 +36,7 @@ extern const VMStateDescription vmstate_ccw_dev; struct CCWDeviceClass { DeviceClass parent_class; void (*unplug)(HotplugHandler *, DeviceState *, Error **); - void (*realize)(CcwDevice *, Error **); + bool (*realize)(CcwDevice *, Error **); void (*refill_ids)(CcwDevice *); }; diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c index 5261e66..3c09750 100644 --- a/hw/s390x/s390-ccw.c +++ b/hw/s390x/s390-ccw.c @@ -71,7 +71,7 @@ IOInstEnding s390_ccw_store(SubchDev *sch) return ret; } -static void s390_ccw_get_dev_info(S390CCWDevice *cdev, +static bool s390_ccw_get_dev_info(S390CCWDevice *cdev, char *sysfsdev, Error **errp) { @@ -84,12 +84,12 @@ static void s390_ccw_get_dev_info(S390CCWDevice *cdev, error_setg(errp, "No host device provided"); error_append_hint(errp, "Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n"); - return; + return false; } if (!realpath(sysfsdev, dev_path)) { error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev); - return; + return false; } cdev->mdevid = g_path_get_basename(dev_path); @@ -98,30 +98,29 @@ static void s390_ccw_get_dev_info(S390CCWDevice *cdev, tmp = g_path_get_basename(tmp_dir); if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) { error_setg_errno(errp, errno, "Failed to read %s", tmp); - return; + return false; } cdev->hostid.cssid = cssid; cdev->hostid.ssid = ssid; cdev->hostid.devid = devid; cdev->hostid.valid = true; + return true; } -static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) +static bool s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) { CcwDevice *ccw_dev = CCW_DEVICE(cdev); CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); DeviceState *parent = DEVICE(ccw_dev); SubchDev *sch; int ret; - Error *err = NULL; - s390_ccw_get_dev_info(cdev, sysfsdev, &err); - if (err) { - goto out_err_propagate; + if (!s390_ccw_get_dev_info(cdev, sysfsdev, errp)) { + return false; } - sch = css_create_sch(ccw_dev->devno, &err); + sch = css_create_sch(ccw_dev->devno, errp); if (!sch) { goto out_mdevid_free; } @@ -132,19 +131,18 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) ccw_dev->sch = sch; ret = css_sch_build_schib(sch, &cdev->hostid); if (ret) { - error_setg_errno(&err, -ret, "%s: Failed to build initial schib", + error_setg_errno(errp, -ret, "%s: Failed to build initial schib", __func__); goto out_err; } - ck->realize(ccw_dev, &err); - if (err) { + if (!ck->realize(ccw_dev, errp)) { goto out_err; } css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, parent->hotplugged, 1); - return; + return true; out_err: css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); @@ -152,8 +150,7 @@ out_err: g_free(sch); out_mdevid_free: g_free(cdev->mdevid); -out_err_propagate: - error_propagate(errp, err); + return false; } static void s390_ccw_unrealize(S390CCWDevice *cdev) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 3d0bc3e..cd063f8 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -47,6 +47,7 @@ #include "migration/blocker.h" #include "qapi/visitor.h" #include "hw/s390x/cpu-topology.h" +#include CONFIG_DEVICES static Error *pv_mig_blocker; @@ -1126,6 +1127,8 @@ static void ccw_machine_2_12_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(2_12, "2.12", false); +#ifdef CONFIG_S390X_LEGACY_CPUS + static void ccw_machine_2_11_instance_options(MachineState *machine) { static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; @@ -1272,6 +1275,8 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index c12531a..0c4354e 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -172,7 +172,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) * Report this error, but do not make it a failing condition. * Lack of this IRQ in the host does not prevent normal operation. */ - error_report_err(err); + warn_report_err(err); } return; diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 2600e62..1f8e127 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -582,14 +582,13 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) /* Call the class init function for subchannel. */ if (cdc->realize) { - cdc->realize(cdev, vcdev->vdev.sysfsdev, &err); - if (err) { - goto out_err_propagate; + if (!cdc->realize(cdev, vcdev->vdev.sysfsdev, errp)) { + return; } } if (!vfio_device_get_name(vbasedev, errp)) { - return; + goto out_unrealize; } if (!vfio_attach_device(cdev->mdevid, vbasedev, @@ -597,17 +596,17 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) goto out_attach_dev_err; } - if (!vfio_ccw_get_region(vcdev, &err)) { + if (!vfio_ccw_get_region(vcdev, errp)) { goto out_region_err; } - if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err)) { + if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, errp)) { goto out_io_notifier_err; } if (vcdev->crw_region) { if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, - &err)) { + errp)) { goto out_irq_notifier_err; } } @@ -617,7 +616,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) * Report this error, but do not make it a failing condition. * Lack of this IRQ in the host does not prevent normal operation. */ - error_report_err(err); + warn_report_err(err); } return; @@ -632,11 +631,10 @@ out_region_err: vfio_detach_device(vbasedev); out_attach_dev_err: g_free(vbasedev->name); +out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); } -out_err_propagate: - error_propagate(errp, err); } static void vfio_ccw_unrealize(DeviceState *dev) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f9619a1..7cdb969 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -630,16 +630,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - if (bcontainer->iova_ranges) { - ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, - bcontainer->iova_ranges, - &err); - if (ret) { - g_free(giommu); - goto fail; - } - } - ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { @@ -849,20 +839,11 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, return false; } -static void vfio_dirty_tracking_update(MemoryListener *listener, - MemoryRegionSection *section) +static void vfio_dirty_tracking_update_range(VFIODirtyRanges *range, + hwaddr iova, hwaddr end, + bool update_pci) { - VFIODirtyRangesListener *dirty = container_of(listener, - VFIODirtyRangesListener, - listener); - VFIODirtyRanges *range = &dirty->ranges; - hwaddr iova, end, *min, *max; - - if (!vfio_listener_valid_section(section, "tracking_update") || - !vfio_get_section_iova_range(dirty->bcontainer, section, - &iova, &end, NULL)) { - return; - } + hwaddr *min, *max; /* * The address space passed to the dirty tracker is reduced to three ranges: @@ -883,8 +864,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, * The alternative would be an IOVATree but that has a much bigger runtime * overhead and unnecessary complexity. */ - if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && - iova >= UINT32_MAX) { + if (update_pci && iova >= UINT32_MAX) { min = &range->minpci64; max = &range->maxpci64; } else { @@ -899,7 +879,23 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, } trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); - return; +} + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIODirtyRangesListener *dirty = + container_of(listener, VFIODirtyRangesListener, listener); + hwaddr iova, end; + + if (!vfio_listener_valid_section(section, "tracking_update") || + !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } + + vfio_dirty_tracking_update_range(&dirty->ranges, iova, end, + vfio_section_is_vfio_pci(section, dirty->bcontainer)); } static const MemoryListener vfio_dirty_tracking_listener = { @@ -1030,7 +1026,7 @@ static void vfio_device_feature_dma_logging_start_destroy( g_free(feature); } -static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, +static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, Error **errp) { struct vfio_device_feature *feature; @@ -1043,7 +1039,7 @@ static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, &ranges); if (!feature) { error_setg_errno(errp, errno, "Failed to prepare DMA logging"); - return -errno; + return false; } QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { @@ -1068,7 +1064,7 @@ out: vfio_device_feature_dma_logging_start_destroy(feature); - return ret; + return ret == 0; } static bool vfio_listener_log_global_start(MemoryListener *listener, @@ -1077,18 +1073,18 @@ static bool vfio_listener_log_global_start(MemoryListener *listener, ERRP_GUARD(); VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); - int ret; + bool ret; if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ret = vfio_devices_dma_logging_start(bcontainer, errp); } else { - ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp); + ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp) == 0; } - if (ret) { + if (!ret) { error_prepend(errp, "vfio: Could not start dirty page tracking - "); } - return !ret; + return ret; } static void vfio_listener_log_global_stop(MemoryListener *listener) @@ -1306,37 +1302,50 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, &vrdl); } +static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIOGuestIOMMU *giommu; + bool found = false; + Int128 llend; + vfio_giommu_dirty_notifier gdn; + int idx; + + QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + found = true; + break; + } + } + + if (!found) { + return 0; + } + + gdn.giommu = giommu; + idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, + MEMTXATTRS_UNSPECIFIED); + + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); + + iommu_notifier_init(&gdn.n, vfio_iommu_map_dirty_notify, IOMMU_NOTIFIER_MAP, + section->offset_within_region, int128_get64(llend), + idx); + memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); + + return 0; +} + static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { ram_addr_t ram_addr; if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - Int128 llend; - vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; - int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, - MEMTXATTRS_UNSPECIFIED); - - llend = int128_add(int128_make64(section->offset_within_region), - section->size); - llend = int128_sub(llend, int128_one()); - - iommu_notifier_init(&gdn.n, - vfio_iommu_map_dirty_notify, - IOMMU_NOTIFIER_MAP, - section->offset_within_region, - int128_get64(llend), - idx); - memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); - break; - } - } - return 0; + return vfio_sync_iommu_dirty_bitmap(bcontainer, section); } else if (memory_region_has_ram_discard_manager(section->mr)) { int ret; @@ -1499,6 +1508,13 @@ void vfio_put_address_space(VFIOAddressSpace *space) } } +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer) +{ + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + bcontainer->space = space; +} + struct vfio_device_info *vfio_get_device_info(int fd) { struct vfio_device_info *info; @@ -1528,6 +1544,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, { const VFIOIOMMUClass *ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + HostIOMMUDevice *hiod; if (vbasedev->iommufd) { ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); @@ -1535,7 +1552,19 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, assert(ops); - return ops->attach_device(name, vbasedev, as, errp); + if (!ops->attach_device(name, vbasedev, as, errp)) { + return false; + } + + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); + if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { + object_unref(hiod); + ops->detach_device(vbasedev); + return false; + } + vbasedev->hiod = hiod; + + return true; } void vfio_detach_device(VFIODevice *vbasedev) @@ -1543,5 +1572,6 @@ void vfio_detach_device(VFIODevice *vbasedev) if (!vbasedev->bcontainer) { return; } - vbasedev->bcontainer->ops->detach_device(vbasedev); + object_unref(vbasedev->hiod); + VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 760d9d0..50b1664 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -19,73 +19,73 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) { - g_assert(bcontainer->ops->dma_map); - return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_map); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) { - g_assert(bcontainer->ops->dma_unmap); - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_unmap); + return vioc->dma_unmap(bcontainer, iova, size, iotlb); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { - if (!bcontainer->ops->add_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->add_window) { return true; } - return bcontainer->ops->add_window(bcontainer, section, errp); + return vioc->add_window(bcontainer, section, errp); } void vfio_container_del_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - if (!bcontainer->ops->del_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->del_window) { return; } - return bcontainer->ops->del_window(bcontainer, section); + return vioc->del_window(bcontainer, section); } int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, bool start, Error **errp) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + if (!bcontainer->dirty_pages_supported) { return 0; } - g_assert(bcontainer->ops->set_dirty_page_tracking); - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp); + g_assert(vioc->set_dirty_page_tracking); + return vioc->set_dirty_page_tracking(bcontainer, start, errp); } int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { - g_assert(bcontainer->ops->query_dirty_bitmap); - return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size, - errp); -} + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); -void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - const VFIOIOMMUClass *ops) -{ - bcontainer->ops = ops; - bcontainer->space = space; - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); + g_assert(vioc->query_dirty_bitmap); + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, + errp); } -void vfio_container_destroy(VFIOContainerBase *bcontainer) +static void vfio_container_instance_finalize(Object *obj) { + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); VFIOGuestIOMMU *giommu, *tmp; QLIST_REMOVE(bcontainer, next); @@ -100,11 +100,27 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) g_list_free_full(bcontainer->iova_ranges, g_free); } +static void vfio_container_instance_init(Object *obj) +{ + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); + + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); +} + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU, - .parent = TYPE_INTERFACE, + .parent = TYPE_OBJECT, + .instance_init = vfio_container_instance_init, + .instance_finalize = vfio_container_instance_finalize, + .instance_size = sizeof(VFIOContainerBase), .class_size = sizeof(VFIOIOMMUClass), + .abstract = true, }, }; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 096cc97..2e7ecdf 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -354,7 +354,7 @@ static void vfio_kvm_device_del_group(VFIOGroup *group) /* * vfio_get_iommu_type - selects the richest iommu_type (v2 first) */ -static int vfio_get_iommu_type(VFIOContainer *container, +static int vfio_get_iommu_type(int container_fd, Error **errp) { int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, @@ -362,7 +362,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, int i; for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { return iommu_types[i]; } } @@ -373,67 +373,70 @@ static int vfio_get_iommu_type(VFIOContainer *container, /* * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type */ -static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) +static const char *vfio_get_iommu_class_name(int iommu_type) { - ObjectClass *klass = NULL; - switch (iommu_type) { case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + return TYPE_VFIO_IOMMU_LEGACY; break; case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); + return TYPE_VFIO_IOMMU_SPAPR; break; default: g_assert_not_reached(); }; - - return VFIO_IOMMU_CLASS(klass); } -static bool vfio_set_iommu(VFIOContainer *container, int group_fd, - VFIOAddressSpace *space, Error **errp) +static bool vfio_set_iommu(int container_fd, int group_fd, + int *iommu_type, Error **errp) { - int iommu_type; - const VFIOIOMMUClass *vioc; - - iommu_type = vfio_get_iommu_type(container, errp); - if (iommu_type < 0) { - return false; - } - - if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) { error_setg_errno(errp, errno, "Failed to set group container"); return false; } - while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { - if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) { + if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { /* * On sPAPR, despite the IOMMU subdriver always advertises v1 and * v2, the running platform may not support v2 and there is no * way to guess it until an IOMMU group gets added to the container. * So in case it fails with v2, try v1 as a fallback. */ - iommu_type = VFIO_SPAPR_TCE_IOMMU; + *iommu_type = VFIO_SPAPR_TCE_IOMMU; continue; } error_setg_errno(errp, errno, "Failed to set iommu for container"); return false; } - container->iommu_type = iommu_type; + return true; +} - vioc = vfio_get_iommu_class(iommu_type, errp); - if (!vioc) { - error_setg(errp, "No available IOMMU models"); - return false; +static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, + Error **errp) +{ + int iommu_type; + const char *vioc_name; + VFIOContainer *container; + + iommu_type = vfio_get_iommu_type(fd, errp); + if (iommu_type < 0) { + return NULL; } - vfio_container_init(&container->bcontainer, space, vioc); - return true; + if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + return NULL; + } + + vioc_name = vfio_get_iommu_class_name(iommu_type); + + container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); + container->fd = fd; + container->iommu_type = iommu_type; + return container; } static int vfio_get_iommu_info(VFIOContainer *container, @@ -542,6 +545,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, VFIOContainerBase *bcontainer; int ret, fd; VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; space = vfio_get_address_space(as); @@ -610,13 +614,11 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto close_fd_exit; } - container = g_malloc0(sizeof(*container)); - container->fd = fd; - bcontainer = &container->bcontainer; - - if (!vfio_set_iommu(container, group->fd, space, errp)) { - goto free_container_exit; + container = vfio_create_container(fd, group, errp); + if (!container) { + goto close_fd_exit; } + bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { goto free_container_exit; @@ -628,16 +630,16 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto unregister_container_exit; } - assert(bcontainer->ops->setup); + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); - if (!bcontainer->ops->setup(bcontainer, errp)) { + if (!vioc->setup(bcontainer, errp)) { goto enable_discards_exit; } vfio_kvm_device_add_group(group); - QLIST_INIT(&container->group_list); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); @@ -659,8 +661,8 @@ listener_release_exit: QLIST_REMOVE(bcontainer, next); vfio_kvm_device_del_group(group); memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } enable_discards_exit: @@ -670,7 +672,7 @@ unregister_container_exit: vfio_cpr_unregister_container(bcontainer); free_container_exit: - g_free(container); + object_unref(container); close_fd_exit: close(fd); @@ -685,6 +687,7 @@ static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); QLIST_REMOVE(group, container_next); group->container = NULL; @@ -696,8 +699,8 @@ static void vfio_disconnect_container(VFIOGroup *group) */ if (QLIST_EMPTY(&container->group_list)) { memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } } @@ -709,12 +712,10 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = bcontainer->space; - vfio_container_destroy(bcontainer); - trace_vfio_disconnect_container(container->fd); vfio_cpr_unregister_container(bcontainer); close(container->fd); - g_free(container); + object_unref(container); vfio_put_address_space(space); } @@ -1126,6 +1127,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO; + vioc->setup = vfio_legacy_setup; vioc->dma_map = vfio_legacy_dma_map; vioc->dma_unmap = vfio_legacy_dma_unmap; @@ -1136,12 +1139,75 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; }; +static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + + hiod->name = g_strdup(vdev->name); + hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); + hiod->agent = opaque; + + return true; +} + +static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, + Error **errp) +{ + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } +} + +static GList * +hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + +static void vfio_iommu_legacy_instance_init(Object *obj) +{ + VFIOContainer *container = VFIO_IOMMU_LEGACY(obj); + + QLIST_INIT(&container->group_list); +} + +static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->realize = hiod_legacy_vfio_realize; + hioc->get_cap = hiod_legacy_vfio_get_cap; + hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_LEGACY, .parent = TYPE_VFIO_IOMMU, + .instance_init = vfio_iommu_legacy_instance_init, + .instance_size = sizeof(VFIOContainer), .class_init = vfio_iommu_legacy_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_legacy_vfio_class_init, + } }; DEFINE_TYPES(types) diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 27ea26a..b14edd4 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -658,3 +658,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, vbasedev->ram_block_discard_allowed = ram_discard; } + +int vfio_device_get_aw_bits(VFIODevice *vdev) +{ + /* + * iova_ranges is a sorted list. For old kernels that support + * VFIO but not support query of iova ranges, iova_ranges is NULL, + * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned. + */ + GList *l = g_list_last(vdev->bcontainer->iova_ranges); + + if (l) { + Range *range = l->data; + return range_get_last_bit(range) + 1; + } + + return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; +} diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 554f9a6..c2f158e 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -237,9 +237,8 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) return; } memory_listener_unregister(&bcontainer->listener); - vfio_container_destroy(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); - g_free(container); + object_unref(container); } static int iommufd_cdev_ram_block_discard_disable(bool state) @@ -324,7 +323,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - if (bcontainer->ops != iommufd_vioc || + if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || vbasedev->iommufd != container->be) { continue; } @@ -352,13 +351,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); - container = g_malloc0(sizeof(*container)); + container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, iommufd_vioc); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { goto err_attach_container; @@ -465,7 +463,7 @@ static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { - if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) { continue; } if (devid == vbasedev_iter->devid) { @@ -612,6 +610,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; + vioc->dma_map = iommufd_cdev_map; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; @@ -619,12 +619,64 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; }; +static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + HostIOMMUDeviceCaps *caps = &hiod->caps; + enum iommu_hw_info_type type; + union { + struct iommu_hw_info_vtd vtd; + } data; + + hiod->agent = opaque; + + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, + &type, &data, sizeof(data), errp)) { + return false; + } + + hiod->name = g_strdup(vdev->name); + caps->type = type; + caps->aw_bits = vfio_device_get_aw_bits(vdev); + + return true; +} + +static GList * +hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + +static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + + hiodc->realize = hiod_iommufd_vfio_realize; + hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_IOMMUFD, .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOIOMMUFDContainer), .class_init = vfio_iommu_iommufd_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .class_init = hiod_iommufd_vfio_class_init, + } }; DEFINE_TYPES(types) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 74a79bd..e03d9f3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2511,9 +2511,9 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) { VFIODevice *vbasedev = &vdev->vbasedev; - const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer); - return ops->pci_hot_reset(vbasedev, single); + return vioc->pci_hot_reset(vbasedev, single); } /* @@ -3121,10 +3121,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_register(vdev); - if (!vfio_add_capabilities(vdev, errp)) { + if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { + error_prepend(errp, "Failed to set iommu_device: "); goto out_teardown; } + if (!vfio_add_capabilities(vdev, errp)) { + goto out_unset_idev; + } + if (vdev->vga) { vfio_vga_quirk_setup(vdev); } @@ -3141,7 +3146,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) error_setg(errp, "cannot support IGD OpRegion feature on hotplugged " "device"); - goto out_teardown; + goto out_unset_idev; } ret = vfio_get_dev_region_info(vbasedev, @@ -3150,11 +3155,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (ret) { error_setg_errno(errp, -ret, "does not support requested IGD OpRegion feature"); - goto out_teardown; + goto out_unset_idev; } if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - goto out_teardown; + goto out_unset_idev; } } @@ -3238,6 +3243,8 @@ out_deregister: if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } +out_unset_idev: + pci_device_unset_iommu_device(pdev); out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); @@ -3266,6 +3273,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); vfio_unregister_err_notifier(vdev); @@ -3280,7 +3288,8 @@ static void vfio_exitfn(PCIDevice *pdev) vfio_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); - vfio_migration_exit(&vdev->vbasedev); + vfio_migration_exit(vbasedev); + pci_device_unset_iommu_device(pdev); } static void vfio_pci_reset(DeviceState *dev) diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 47b040f..018bd20 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -30,6 +30,8 @@ typedef struct VFIOSpaprContainer { QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; } VFIOSpaprContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR); + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) { if (memory_region_is_iommu(section->mr)) { @@ -548,6 +550,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_SPAPR, .parent = TYPE_VFIO_IOMMU_LEGACY, + .instance_size = sizeof(VFIOSpaprContainer), .class_init = vfio_iommu_spapr_class_init, }, }; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1326c6e..b9a7ddc 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping { uint32_t flags; } VirtIOIOMMUMapping; +struct hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ -462,8 +467,195 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct hiod_key *key1 = v1; + const struct hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static guint hiod_hash(gconstpointer v) +{ + const struct hiod_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + +static void hiod_destroy(gpointer v) +{ + object_unref(v); +} + +static HostIOMMUDevice * +get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + return g_hash_table_lookup(viommu->host_iommu_devices, &key); +} + +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn, GList *iova_ranges, + Error **errp) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + GList *current_ranges; + GList *l, *tmp, *new_ranges = NULL; + int ret = -EINVAL; + + if (!sbus) { + error_report("%s no sbus", __func__); + } + + sdev = sbus->pbdev[devfn]; + + current_ranges = sdev->host_resv_ranges; + + g_assert(!sdev->probe_done); + + /* check that each new resv region is included in an existing one */ + if (sdev->host_resv_ranges) { + range_inverse_array(iova_ranges, + &new_ranges, + 0, UINT64_MAX); + + for (tmp = new_ranges; tmp; tmp = tmp->next) { + Range *newr = (Range *)tmp->data; + bool included = false; + + for (l = current_ranges; l; l = l->next) { + Range * r = (Range *)l->data; + + if (range_contains_range(r, newr)) { + included = true; + break; + } + } + if (!included) { + goto error; + } + } + /* all new reserved ranges are included in existing ones */ + ret = 0; + goto out; + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +error: + error_setg(errp, "%s Conflicting host reserved ranges set!", + __func__); +out: + g_list_free_full(new_ranges, g_free); + return ret; +} + +static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + struct hiod_key *new_key; + GList *host_iova_ranges = NULL; + + assert(hiod); + + if (get_host_iommu_device(viommu, bus, devfn)) { + error_setg(errp, "Host IOMMU device already exists"); + return false; + } + + if (hiodc->get_iova_ranges) { + int ret; + host_iova_ranges = hiodc->get_iova_ranges(hiod, errp); + if (!host_iova_ranges) { + return true; /* some old kernels may not support that capability */ + } + ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn, + host_iova_ranges, errp); + if (ret) { + g_list_free_full(host_iova_ranges, g_free); + return false; + } + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + g_list_free_full(host_iova_ranges, g_free); + + return true; +} + +static void +virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDevice *hiod; + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); + if (!hiod) { + return; + } + + g_hash_table_remove(viommu->host_iommu_devices, &key); +} + static const PCIIOMMUOps virtio_iommu_ops = { .get_address_space = virtio_iommu_find_add_as, + .set_iommu_device = virtio_iommu_set_iommu_device, + .unset_iommu_device = virtio_iommu_unset_iommu_device, }; static int virtio_iommu_attach(VirtIOIOMMU *s, @@ -1159,106 +1351,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } -/** - * rebuild_resv_regions: rebuild resv regions with both the - * info of host resv ranges and property set resv ranges - */ -static int rebuild_resv_regions(IOMMUDevice *sdev) -{ - GList *l; - int i = 0; - - /* free the existing list and rebuild it from scratch */ - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - - /* First add host reserved regions if any, all tagged as RESERVED */ - for (l = sdev->host_resv_ranges; l; l = l->next) { - ReservedRegion *reg = g_new0(ReservedRegion, 1); - Range *r = (Range *)l->data; - - reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; - range_set_bounds(®->range, range_lob(r), range_upb(r)); - sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); - trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, - range_lob(®->range), - range_upb(®->range)); - i++; - } - /* - * then add higher priority reserved regions set by the machine - * through properties - */ - add_prop_resv_regions(sdev); - return 0; -} - -/** - * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges - * - * The function turns those into reserved ranges. Once some - * reserved ranges have been set, new reserved regions cannot be - * added outside of the original ones. - * - * @mr: IOMMU MR - * @iova_ranges: list of usable IOVA ranges - * @errp: error handle - */ -static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - GList *current_ranges = sdev->host_resv_ranges; - GList *l, *tmp, *new_ranges = NULL; - int ret = -EINVAL; - - /* check that each new resv region is included in an existing one */ - if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; - } - - if (sdev->probe_done) { - warn_report("%s: Notified about new host reserved regions after probe", - mr->parent_obj.name); - } - - range_inverse_array(iova_ranges, - &sdev->host_resv_ranges, - 0, UINT64_MAX); - rebuild_resv_regions(sdev); - - return 0; -error: - error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", - mr->parent_obj.name); -out: - g_list_free_full(new_ranges, g_free); - return ret; -} - static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1357,6 +1449,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, + g_free, hiod_destroy); + if (s->primary_bus) { pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { @@ -1581,7 +1676,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; - imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h index eb14b91..1bd2c4e 100644 --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -1,15 +1,6 @@ #ifndef GDBSTUB_H #define GDBSTUB_H -#define DEFAULT_GDBSTUB_PORT "1234" - -/* GDB breakpoint/watchpoint types */ -#define GDB_BREAKPOINT_SW 0 -#define GDB_BREAKPOINT_HW 1 -#define GDB_WATCHPOINT_WRITE 2 -#define GDB_WATCHPOINT_READ 3 -#define GDB_WATCHPOINT_ACCESS 4 - typedef struct GDBFeature { const char *xmlname; const char *xml; @@ -144,4 +135,4 @@ void gdb_set_stop_cpu(CPUState *cpu); /* in gdbstub-xml.c, generated by scripts/feature_to_c.py */ extern const GDBFeature gdb_static_features[]; -#endif +#endif /* GDBSTUB_H */ diff --git a/include/exec/memory.h b/include/exec/memory.h index 2d7c278..0903513 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -530,26 +530,6 @@ struct IOMMUMemoryRegionClass { int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu, uint64_t page_size_mask, Error **errp); - /** - * @iommu_set_iova_ranges: - * - * Propagate information about the usable IOVA ranges for a given IOMMU - * memory region. Used for example to propagate host physical device - * reserved memory region constraints to the virtual IOMMU. - * - * Optional method: if this method is not provided, then the default IOVA - * aperture is used. - * - * @iommu: the IOMMUMemoryRegion - * - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * - * Returns 0 on success, or a negative error. In case of failure, the error - * object must be created. - */ - int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); }; typedef struct RamDiscardListener RamDiscardListener; @@ -1952,18 +1932,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, Error **errp); /** - * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges - * for a given IOMMU MR region - * - * @iommu: IOMMU memory region - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * @errp: pointer to Error*, to store an error if it happens. - */ -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); - -/** * memory_region_name: get a memory region's name * * Returns the string that was used to initialize the memory region. diff --git a/include/gdbstub/enums.h b/include/gdbstub/enums.h new file mode 100644 index 0000000..c4d54a1 --- /dev/null +++ b/include/gdbstub/enums.h @@ -0,0 +1,21 @@ +/* + * gdbstub enums + * + * Copyright (c) 2024 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef GDBSTUB_ENUMS_H +#define GDBSTUB_ENUMS_H + +#define DEFAULT_GDBSTUB_PORT "1234" + +/* GDB breakpoint/watchpoint types */ +#define GDB_BREAKPOINT_SW 0 +#define GDB_BREAKPOINT_HW 1 +#define GDB_WATCHPOINT_WRITE 2 +#define GDB_WATCHPOINT_READ 3 +#define GDB_WATCHPOINT_ACCESS 4 + +#endif /* GDBSTUB_ENUMS_H */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 7fa0a69..1eb05c2 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -292,6 +292,8 @@ struct IntelIOMMUState { /* list of registered notifiers */ QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */ + /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ dma_addr_t intr_root; /* Interrupt remapping table pointer */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index eaa3fc9..eb26cac 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -3,6 +3,7 @@ #include "exec/memory.h" #include "sysemu/dma.h" +#include "sysemu/host_iommu_device.h" /* PCI includes legacy ISA access. */ #include "hw/isa/isa.h" @@ -383,10 +384,45 @@ typedef struct PCIIOMMUOps { * * @devfn: device and function number */ - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + /** + * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU + * + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't + * retrieve host information from the associated HostIOMMUDevice. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @dev: the #HostIOMMUDevice to attach. + * + * @errp: pass an Error out only when return false + * + * Returns: true if HostIOMMUDevice is attached or else false with errp set. + */ + bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *dev, Error **errp); + /** + * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + */ + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp); +void pci_device_unset_iommu_device(PCIDevice *dev); /** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index 3db8391..c0dc41f 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -118,6 +118,7 @@ enum { #define FDT_PLIC_ADDR_CELLS 0 #define FDT_PLIC_INT_CELLS 1 #define FDT_APLIC_INT_CELLS 2 +#define FDT_APLIC_ADDR_CELLS 0 #define FDT_IMSIC_INT_CELLS 0 #define FDT_MAX_INT_CELLS 2 #define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h index 2c807ee..2e0a709 100644 --- a/include/hw/s390x/s390-ccw.h +++ b/include/hw/s390x/s390-ccw.h @@ -31,7 +31,7 @@ struct S390CCWDevice { struct S390CCWDeviceClass { CCWDeviceClass parent_class; - void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp); + bool (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp); void (*unrealize)(S390CCWDevice *dev); IOInstEnding (*handle_request) (SubchDev *sch); int (*handle_halt) (SubchDev *sch); diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 4cb1ab8..e8ddf92 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -31,6 +31,8 @@ #endif #include "sysemu/sysemu.h" #include "hw/vfio/vfio-container-base.h" +#include "sysemu/host_iommu_device.h" +#include "sysemu/iommufd.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -82,6 +84,8 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOGroup) group_list; } VFIOContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); + typedef struct VFIOHostDMAWindow { hwaddr min_iova; hwaddr max_iova; @@ -97,6 +101,8 @@ typedef struct VFIOIOMMUFDContainer { uint32_t ioas_id; } VFIOIOMMUFDContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD); + typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { @@ -125,6 +131,7 @@ typedef struct VFIODevice { OnOffAuto pre_copy_dirty_page_tracking; bool dirty_pages_supported; bool dirty_tracking; + HostIOMMUDevice *hiod; int devid; IOMMUFDBackend *iommufd; } VFIODevice; @@ -171,6 +178,10 @@ typedef struct VFIOGroup { bool ram_block_discard_allowed; } VFIOGroup; +#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ + TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" + typedef struct VFIODMABuf { QemuDmaBuf *buf; uint32_t pos_x, pos_y, pos_updates; @@ -199,10 +210,8 @@ typedef struct VFIODisplay { VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); - -/* SPAPR specific */ -int vfio_spapr_container_init(VFIOContainer *container, Error **errp); -void vfio_spapr_container_deinit(VFIOContainer *container); +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); @@ -283,4 +292,5 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); +int vfio_device_get_aw_bits(VFIODevice *vdev); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 2776481..419e45e 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -34,7 +34,7 @@ typedef struct VFIOAddressSpace { * This is the base object for vfio container backends */ typedef struct VFIOContainerBase { - const VFIOIOMMUClass *ops; + Object parent; VFIOAddressSpace *space; MemoryListener listener; Error *error; @@ -86,28 +86,18 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); -void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, - const VFIOIOMMUClass *ops); -void vfio_container_destroy(VFIOContainerBase *bcontainer); - - #define TYPE_VFIO_IOMMU "vfio-iommu" #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" -/* - * VFIOContainerBase is not an abstract QOM object because it felt - * unnecessary to expose all the IOMMU backends to the QEMU machine - * and human interface. However, we can still abstract the IOMMU - * backend handlers using a QOM interface class. This provides more - * flexibility when referencing the various implementations. - */ -DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) +OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { - InterfaceClass parent_class; + ObjectClass parent_class; + + /* Properties */ + const char *hiod_typename; /* basic feature */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index 83a52cc..bdb3da7 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -25,6 +25,7 @@ #include "hw/pci/pci.h" #include "qom/object.h" #include "qapi/qapi-types-virtio.h" +#include "sysemu/host_iommu_device.h" #define TYPE_VIRTIO_IOMMU "virtio-iommu-device" #define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci" @@ -57,6 +58,7 @@ struct VirtIOIOMMU { struct virtio_iommu_config config; uint64_t features; GHashTable *as_by_busptr; + GHashTable *host_iommu_devices; IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; ReservedRegion *prop_resv_regions; diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h index 95703d8..c71c705 100644 --- a/include/qemu/qemu-plugin.h +++ b/include/qemu/qemu-plugin.h @@ -661,6 +661,33 @@ void qemu_plugin_register_vcpu_mem_inline_per_vcpu( qemu_plugin_u64 entry, uint64_t imm); +/** + * qemu_plugin_request_time_control() - request the ability to control time + * + * This grants the plugin the ability to control system time. Only one + * plugin can control time so if multiple plugins request the ability + * all but the first will fail. + * + * Returns an opaque handle or NULL if fails + */ +QEMU_PLUGIN_API +const void *qemu_plugin_request_time_control(void); + +/** + * qemu_plugin_update_ns() - update system emulation time + * @handle: opaque handle returned by qemu_plugin_request_time_control() + * @time: time in nanoseconds + * + * This allows an appropriately authorised plugin (i.e. holding the + * time control handle) to move system time forward to @time. For + * user-mode emulation the time is not changed by this as all reported + * time comes from the host kernel. + * + * Start time is 0. + */ +QEMU_PLUGIN_API +void qemu_plugin_update_ns(const void *handle, int64_t time); + typedef void (*qemu_plugin_vcpu_syscall_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_index, int64_t num, uint64_t a1, uint64_t a2, diff --git a/include/qemu/range.h b/include/qemu/range.h index 205e1da..4ce694a 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -20,6 +20,8 @@ #ifndef QEMU_RANGE_H #define QEMU_RANGE_H +#include "qemu/bitops.h" + /* * Operations on 64 bit address ranges. * Notes: @@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* Get highest non-zero bit position of a range */ +static inline int range_get_last_bit(Range *range) +{ + if (range_is_empty(range)) { + return -1; + } + return 63 - clz64(range->upb); +} + /* * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. * Both @a and @b must not be empty. diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 9a366e5..5ce83c7 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -245,6 +245,21 @@ bool qemu_clock_run_timers(QEMUClockType type); */ bool qemu_clock_run_all_timers(void); +/** + * qemu_clock_advance_virtual_time(): advance the virtual time tick + * @target_ns: target time in nanoseconds + * + * This function is used where the control of the flow of time has + * been delegated to outside the clock subsystem (be it qtest, icount + * or some other external source). You can ask the clock system to + * return @early at the first expired timer. + * + * Time can only move forward, attempts to reverse time would lead to + * an error. + * + * Returns: new virtual time. + */ +int64_t qemu_clock_advance_virtual_time(int64_t target_ns); /* * QEMUTimerList diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h index ef91fc2..a088672 100644 --- a/include/sysemu/accel-ops.h +++ b/include/sysemu/accel-ops.h @@ -20,7 +20,12 @@ typedef struct AccelOpsClass AccelOpsClass; DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS) -/* cpus.c operations interface */ +/** + * struct AccelOpsClass - accelerator interfaces + * + * This structure is used to abstract accelerator differences from the + * core CPU code. Not all have to be implemented. + */ struct AccelOpsClass { /*< private >*/ ObjectClass parent_class; @@ -44,7 +49,18 @@ struct AccelOpsClass { void (*handle_interrupt)(CPUState *cpu, int mask); + /** + * @get_virtual_clock: fetch virtual clock + * @set_virtual_clock: set virtual clock + * + * These allow the timer subsystem to defer to the accelerator to + * fetch time. The set function is needed if the accelerator wants + * to track the changes to time as the timer is warped through + * various timer events. + */ int64_t (*get_virtual_clock)(void); + void (*set_virtual_clock)(int64_t time); + int64_t (*get_elapsed_ticks)(void); /* gdbstub hooks */ diff --git a/include/sysemu/cpu-timers.h b/include/sysemu/cpu-timers.h index d86738a..7bfa960 100644 --- a/include/sysemu/cpu-timers.h +++ b/include/sysemu/cpu-timers.h @@ -96,8 +96,9 @@ int64_t cpu_get_clock(void); void qemu_timer_notify_cb(void *opaque, QEMUClockType type); -/* get the VIRTUAL clock and VM elapsed ticks via the cpus accel interface */ +/* get/set VIRTUAL clock and VM elapsed ticks via the cpus accel interface */ int64_t cpus_get_virtual_clock(void); +void cpus_set_virtual_clock(int64_t new_time); int64_t cpus_get_elapsed_ticks(void); #endif /* SYSEMU_CPU_TIMERS_H */ diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h new file mode 100644 index 0000000..ee6c813 --- /dev/null +++ b/include/sysemu/host_iommu_device.h @@ -0,0 +1,102 @@ +/* + * Host IOMMU device abstract declaration + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HOST_IOMMU_DEVICE_H +#define HOST_IOMMU_DEVICE_H + +#include "qom/object.h" +#include "qapi/error.h" + +/** + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. + * + * @aw_bits: host IOMMU address width. 0xff if no limitation. + */ +typedef struct HostIOMMUDeviceCaps { + uint32_t type; + uint8_t aw_bits; +} HostIOMMUDeviceCaps; + +#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) + +struct HostIOMMUDevice { + Object parent_obj; + + char *name; + void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ + PCIBus *aliased_bus; + int aliased_devfn; + HostIOMMUDeviceCaps caps; +}; + +/** + * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices. + * + * Different types of host devices (e.g., VFIO or VDPA device) or devices + * with different backend (e.g., VFIO legacy container or IOMMUFD backend) + * will have different implementations of the HostIOMMUDeviceClass. + */ +struct HostIOMMUDeviceClass { + ObjectClass parent_class; + + /** + * @realize: initialize host IOMMU device instance further. + * + * Mandatory callback. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @opaque: pointer to agent device of this host IOMMU device, + * e.g., VFIO base device or VDPA device. + * + * @errp: pass an Error out when realize fails. + * + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); + /** + * @get_cap: check if a host IOMMU device capability is supported. + * + * Optional callback, if not implemented, hint not supporting query + * of @cap. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @cap: capability to check. + * + * @errp: pass an Error out when fails to query capability. + * + * Returns: <0 on failure, 0 if a @cap is unsupported, or else + * 1 or some positive value for some special @cap, + * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. + */ + int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); + /** + * @get_iova_ranges: Return the list of usable iova_ranges along with + * @hiod Host IOMMU device + * + * @hiod: handle to the host IOMMU device + * @errp: error handle + */ + GList* (*get_iova_ranges)(HostIOMMUDevice *hiod, Error **errp); +}; + +/* + * Host IOMMU device capability list. + */ +#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0 +#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1 + +#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64 +#endif diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h index 293bfbe..9edfec6 100644 --- a/include/sysemu/iommufd.h +++ b/include/sysemu/iommufd.h @@ -1,9 +1,23 @@ +/* + * iommufd container backend declaration + * + * Copyright (C) 2024 Intel Corporation. + * Copyright Red Hat, Inc. 2024 + * + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> + * Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + #ifndef SYSEMU_IOMMUFD_H #define SYSEMU_IOMMUFD_H #include "qom/object.h" #include "exec/hwaddr.h" #include "exec/cpu-common.h" +#include "sysemu/host_iommu_device.h" #define TYPE_IOMMUFD_BACKEND "iommufd" OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) @@ -33,4 +47,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size); +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp); + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" #endif diff --git a/include/sysemu/qtest.h b/include/sysemu/qtest.h index b5d5fd3..c161d75 100644 --- a/include/sysemu/qtest.h +++ b/include/sysemu/qtest.h @@ -34,8 +34,6 @@ void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error ** void qtest_server_set_send_handler(void (*send)(void *, const char *), void *opaque); void qtest_server_inproc_recv(void *opaque, const char *buf); - -int64_t qtest_get_virtual_clock(void); #endif #endif diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 45ee3a9..f601d06 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -15,8 +15,9 @@ #include "qemu/osdep.h" #include "exec/address-spaces.h" -#include "exec/gdbstub.h" #include "exec/ioport.h" +#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "monitor/hmp.h" #include "qemu/help_option.h" #include "monitor/monitor-internal.h" diff --git a/plugins/api.c b/plugins/api.c index 5a0a7f8..2ff13d0 100644 --- a/plugins/api.c +++ b/plugins/api.c @@ -39,6 +39,7 @@ #include "qemu/main-loop.h" #include "qemu/plugin.h" #include "qemu/log.h" +#include "qemu/timer.h" #include "tcg/tcg.h" #include "exec/exec-all.h" #include "exec/gdbstub.h" @@ -46,6 +47,8 @@ #include "disas/disas.h" #include "plugin.h" #ifndef CONFIG_USER_ONLY +#include "qapi/error.h" +#include "migration/blocker.h" #include "exec/ram_addr.h" #include "qemu/plugin-memory.h" #include "hw/boards.h" @@ -507,7 +510,7 @@ static GArray *create_register_handles(GArray *gdbstub_regs) } /* Create a record for the plugin */ - desc.handle = GINT_TO_POINTER(grd->gdb_reg); + desc.handle = GINT_TO_POINTER(grd->gdb_reg + 1); desc.name = g_intern_string(grd->name); desc.feature = g_intern_string(grd->feature_name); g_array_append_val(find_data, desc); @@ -528,7 +531,7 @@ int qemu_plugin_read_register(struct qemu_plugin_register *reg, GByteArray *buf) { g_assert(current_cpu); - return gdb_read_register(current_cpu, buf, GPOINTER_TO_INT(reg)); + return gdb_read_register(current_cpu, buf, GPOINTER_TO_INT(reg) - 1); } struct qemu_plugin_scoreboard *qemu_plugin_scoreboard_new(size_t element_size) @@ -583,3 +586,45 @@ uint64_t qemu_plugin_u64_sum(qemu_plugin_u64 entry) } return total; } + +/* + * Time control + */ +static bool has_control; +#ifdef CONFIG_SOFTMMU +static Error *migration_blocker; +#endif + +const void *qemu_plugin_request_time_control(void) +{ + if (!has_control) { + has_control = true; +#ifdef CONFIG_SOFTMMU + error_setg(&migration_blocker, + "TCG plugin time control does not support migration"); + migrate_add_blocker(&migration_blocker, NULL); +#endif + return &has_control; + } + return NULL; +} + +#ifdef CONFIG_SOFTMMU +static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data) +{ + int64_t new_time = data.host_ulong; + qemu_clock_advance_virtual_time(new_time); +} +#endif + +void qemu_plugin_update_ns(const void *handle, int64_t new_time) +{ +#ifdef CONFIG_SOFTMMU + if (handle == &has_control) { + /* Need to execute out of cpu_exec, so bql can be locked. */ + async_run_on_cpu(current_cpu, + advance_virtual_time__async, + RUN_ON_CPU_HOST_ULONG(new_time)); + } +#endif +} diff --git a/plugins/core.c b/plugins/core.c index badede2..9d737d8 100644 --- a/plugins/core.c +++ b/plugins/core.c @@ -589,7 +589,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, switch (cb->type) { case PLUGIN_CB_MEM_REGULAR: - if (rw && cb->regular.rw) { + if (rw & cb->regular.rw) { cb->regular.f.vcpu_mem(cpu->cpu_index, make_plugin_meminfo(oi, rw), vaddr, cb->regular.userp); @@ -597,7 +597,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, break; case PLUGIN_CB_INLINE_ADD_U64: case PLUGIN_CB_INLINE_STORE_U64: - if (rw && cb->inline_insn.rw) { + if (rw & cb->inline_insn.rw) { exec_inline_op(cb->type, &cb->inline_insn, cpu->cpu_index); } break; diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols index aa0a77a..ca773d8 100644 --- a/plugins/qemu-plugins.symbols +++ b/plugins/qemu-plugins.symbols @@ -38,6 +38,7 @@ qemu_plugin_register_vcpu_tb_exec_cond_cb; qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu; qemu_plugin_register_vcpu_tb_trans_cb; + qemu_plugin_request_time_control; qemu_plugin_reset; qemu_plugin_scoreboard_free; qemu_plugin_scoreboard_find; @@ -51,5 +52,6 @@ qemu_plugin_u64_set; qemu_plugin_u64_sum; qemu_plugin_uninstall; + qemu_plugin_update_ns; qemu_plugin_vcpu_for_each; }; diff --git a/stubs/cpus-get-virtual-clock.c b/stubs/cpus-virtual-clock.c index fd447d5..af7c1a1 100644 --- a/stubs/cpus-get-virtual-clock.c +++ b/stubs/cpus-virtual-clock.c @@ -6,3 +6,8 @@ int64_t cpus_get_virtual_clock(void) { return cpu_get_clock(); } + +void cpus_set_virtual_clock(int64_t new_time) +{ + /* do nothing */ +} diff --git a/stubs/meson.build b/stubs/meson.build index f15b48d..772a3e8 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -29,7 +29,7 @@ endif if have_block or have_ga stub_ss.add(files('replay-tools.c')) # stubs for hooks in util/main-loop.c, util/async.c etc. - stub_ss.add(files('cpus-get-virtual-clock.c')) + stub_ss.add(files('cpus-virtual-clock.c')) stub_ss.add(files('icount.c')) stub_ss.add(files('graph-lock.c')) if linux_io_uring.found() diff --git a/system/cpus.c b/system/cpus.c index f8fa78f..d3640c9 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -230,6 +230,17 @@ int64_t cpus_get_virtual_clock(void) } /* + * Signal the new virtual time to the accelerator. This is only needed + * by accelerators that need to track the changes as we warp time. + */ +void cpus_set_virtual_clock(int64_t new_time) +{ + if (cpus_accel && cpus_accel->set_virtual_clock) { + cpus_accel->set_virtual_clock(new_time); + } +} + +/* * return the time elapsed in VM between vm_start and vm_stop. Unless * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle * counter. diff --git a/system/memory.c b/system/memory.c index 47c600d..2d69521 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1914,19 +1914,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, return ret; } -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_iova_ranges) { - ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp); - } - return ret; -} - int memory_region_register_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n, Error **errp) { diff --git a/system/qtest.c b/system/qtest.c index 507a358..12703a2 100644 --- a/system/qtest.c +++ b/system/qtest.c @@ -325,38 +325,6 @@ static void qtest_irq_handler(void *opaque, int n, int level) } } -static int64_t qtest_clock_counter; - -int64_t qtest_get_virtual_clock(void) -{ - return qatomic_read_i64(&qtest_clock_counter); -} - -static void qtest_set_virtual_clock(int64_t count) -{ - qatomic_set_i64(&qtest_clock_counter, count); -} - -static void qtest_clock_warp(int64_t dest) -{ - int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - AioContext *aio_context; - assert(qtest_enabled()); - aio_context = qemu_get_aio_context(); - while (clock < dest) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - int64_t warp = qemu_soonest_timeout(dest - clock, deadline); - - qtest_set_virtual_clock(qtest_get_virtual_clock() + warp); - - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); - timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - } - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); -} - static bool (*process_command_cb)(CharBackend *chr, gchar **words); void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words)) @@ -751,7 +719,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words) ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, QEMU_TIMER_ATTR_ALL); } - qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns); + qemu_clock_advance_virtual_time( + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns); qtest_send_prefix(chr); qtest_sendf(chr, "OK %"PRIi64"\n", (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); @@ -777,7 +746,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words) g_assert(words[1]); ret = qemu_strtoi64(words[1], NULL, 0, &ns); g_assert(ret == 0); - qtest_clock_warp(ns); + qemu_clock_advance_virtual_time(ns); qtest_send_prefix(chr); qtest_sendf(chr, "OK %"PRIi64"\n", (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); diff --git a/system/vl.c b/system/vl.c index a3eede5..cfcb674 100644 --- a/system/vl.c +++ b/system/vl.c @@ -68,6 +68,7 @@ #include "sysemu/numa.h" #include "sysemu/hostmem.h" #include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "qemu/timer.h" #include "chardev/char.h" #include "qemu/bitmap.h" diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 45e2218..ef9bc42 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -33,7 +33,7 @@ #include "trace/trace-target_arm_hvf.h" #include "migration/vmstate.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #define MDSCR_EL1_SS_SHIFT 0 #define MDSCR_EL1_MDE_SHIFT 15 diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c index ebde289..f120d55 100644 --- a/target/arm/hyp_gdbstub.c +++ b/target/arm/hyp_gdbstub.c @@ -12,7 +12,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internals.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" /* Maximum and current break/watch point counts */ int max_hw_bps, max_hw_wps; diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 7cf5cf3..70f79ed 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -31,7 +31,7 @@ #include "hw/pci/pci.h" #include "exec/memattrs.h" #include "exec/address-spaces.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "hw/boards.h" #include "hw/irq.h" #include "qapi/visitor.h" diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 7ad8072..dd8b0f3 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,7 +38,7 @@ #include "hyperv.h" #include "hyperv-proto.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "qemu/host-utils.h" #include "qemu/main-loop.h" #include "qemu/ratelimit.h" diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 005f223..2c39322 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -39,7 +39,7 @@ #include "migration/qemu-file-types.h" #include "sysemu/watchdog.h" #include "trace.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "exec/memattrs.h" #include "exec/ram_addr.h" #include "sysemu/hostmem.h" diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 69a08e8..a2640cf 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1779,7 +1779,9 @@ static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; - if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) { + if (!g_strcmp0(priv_spec_str, PRIV_VER_1_13_0_STR)) { + priv_version = PRIV_VERSION_1_13_0; + } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) { priv_version = PRIV_VERSION_1_12_0; } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_11_0_STR)) { priv_version = PRIV_VERSION_1_11_0; @@ -1790,7 +1792,7 @@ static int priv_spec_from_str(const char *priv_spec_str) return priv_version; } -static const char *priv_spec_to_str(int priv_version) +const char *priv_spec_to_str(int priv_version) { switch (priv_version) { case PRIV_VERSION_1_10_0: @@ -1799,6 +1801,8 @@ static const char *priv_spec_to_str(int priv_version) return PRIV_VER_1_11_0_STR; case PRIV_VERSION_1_12_0: return PRIV_VER_1_12_0_STR; + case PRIV_VERSION_1_13_0: + return PRIV_VER_1_13_0_STR; default: return NULL; } @@ -2246,6 +2250,402 @@ RISCVCPUProfile *riscv_profiles[] = { NULL, }; +static RISCVCPUImpliedExtsRule RVA_IMPLIED = { + .is_misa = true, + .ext = RVA, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zalrsc), CPU_CFG_OFFSET(ext_zaamo), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVD_IMPLIED = { + .is_misa = true, + .ext = RVD, + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule RVF_IMPLIED = { + .is_misa = true, + .ext = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVM_IMPLIED = { + .is_misa = true, + .ext = RVM, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zmmul), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVV_IMPLIED = { + .is_misa = true, + .ext = RVV, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64d), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCD_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcd), + .implied_misa_exts = RVD, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCE_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zce), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zcb), CPU_CFG_OFFSET(ext_zcmp), + CPU_CFG_OFFSET(ext_zcmt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCF_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcf), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCMP_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcmp), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCMT_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcmt), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZDINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zdinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfinx), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZFA_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfa), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFBFMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfbfmin), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFH_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfh), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfhmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZFHMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfhmin), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZHINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zhinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zhinxmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZHINXMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zhinxmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfinx), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZICNTR_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zicntr), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZIHPM_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zihpm), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZK_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zk), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zkn), CPU_CFG_OFFSET(ext_zkr), + CPU_CFG_OFFSET(ext_zkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZKN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zkn), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc), + CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zkne), + CPU_CFG_OFFSET(ext_zknd), CPU_CFG_OFFSET(ext_zknh), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZKS_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zks), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc), + CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zksed), + CPU_CFG_OFFSET(ext_zksh), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVBB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvbb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkb), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE32F_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve32f), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE32X_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve32x), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64D_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64d), + .implied_misa_exts = RVD, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64F_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64f), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), CPU_CFG_OFFSET(ext_zve64x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64X_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64x), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFBFMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfbfmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFBFWMA_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfbfwma), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvfbfmin), CPU_CFG_OFFSET(ext_zfbfmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFH_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfh), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvfhmin), CPU_CFG_OFFSET(ext_zfhmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFHMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfhmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvkn), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkned), CPU_CFG_OFFSET(ext_zvknhb), + CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNC_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvknc), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvbc), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNG_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvkng), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvkg), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNHB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvknhb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKS_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvks), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvksed), CPU_CFG_OFFSET(ext_zvksh), + CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKSC_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvksc), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvbc), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKSG_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvksg), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvkg), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[] = { + &RVA_IMPLIED, &RVD_IMPLIED, &RVF_IMPLIED, + &RVM_IMPLIED, &RVV_IMPLIED, NULL +}; + +RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = { + &ZCB_IMPLIED, &ZCD_IMPLIED, &ZCE_IMPLIED, + &ZCF_IMPLIED, &ZCMP_IMPLIED, &ZCMT_IMPLIED, + &ZDINX_IMPLIED, &ZFA_IMPLIED, &ZFBFMIN_IMPLIED, + &ZFH_IMPLIED, &ZFHMIN_IMPLIED, &ZFINX_IMPLIED, + &ZHINX_IMPLIED, &ZHINXMIN_IMPLIED, &ZICNTR_IMPLIED, + &ZIHPM_IMPLIED, &ZK_IMPLIED, &ZKN_IMPLIED, + &ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED, + &ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, + &ZVE64X_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED, + &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED, + &ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED, + &ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, + NULL +}; + static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 6fe0d71..8774204 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -96,12 +96,14 @@ extern RISCVCPUProfile *riscv_profiles[]; #define PRIV_VER_1_10_0_STR "v1.10.0" #define PRIV_VER_1_11_0_STR "v1.11.0" #define PRIV_VER_1_12_0_STR "v1.12.0" +#define PRIV_VER_1_13_0_STR "v1.13.0" enum { PRIV_VERSION_1_10_0 = 0, PRIV_VERSION_1_11_0, PRIV_VERSION_1_12_0, + PRIV_VERSION_1_13_0, - PRIV_VERSION_LATEST = PRIV_VERSION_1_12_0, + PRIV_VERSION_LATEST = PRIV_VERSION_1_13_0, }; #define VEXT_VERSION_1_00_0 0x00010000 @@ -122,6 +124,29 @@ typedef enum { EXT_STATUS_DIRTY, } RISCVExtStatus; +typedef struct riscv_cpu_implied_exts_rule { +#ifndef CONFIG_USER_ONLY + /* + * Bitmask indicates the rule enabled status for the harts. + * This enhancement is only available in system-mode QEMU, + * as we don't have a good way (e.g. mhartid) to distinguish + * the SMP cores in user-mode QEMU. + */ + unsigned long *enabled; +#endif + /* True if this is a MISA implied rule. */ + bool is_misa; + /* ext is MISA bit if is_misa flag is true, else multi extension offset. */ + const uint32_t ext; + const uint32_t implied_misa_exts; + const uint32_t implied_multi_exts[]; +} RISCVCPUImpliedExtsRule; + +extern RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[]; +extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; + +#define RISCV_IMPLIED_EXTS_RULE_END -1 + #define MMU_USER_IDX 3 #define MAX_RISCV_PMPS (16) @@ -830,4 +855,5 @@ const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); /* Implemented in th_csr.c */ void th_register_custom_csrs(RISCVCPU *cpu); +const char *priv_spec_to_str(int priv_version); #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 74318a9..c257c5e 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -156,6 +156,8 @@ /* 32-bit only */ #define CSR_MSTATUSH 0x310 +#define CSR_MEDELEGH 0x312 +#define CSR_HEDELEGH 0x612 /* Machine Trap Handling */ #define CSR_MSCRATCH 0x340 @@ -315,6 +317,7 @@ #define SMSTATEEN0_CS (1ULL << 0) #define SMSTATEEN0_FCSR (1ULL << 1) #define SMSTATEEN0_JVT (1ULL << 2) +#define SMSTATEEN0_P1P13 (1ULL << 56) #define SMSTATEEN0_HSCONTXT (1ULL << 57) #define SMSTATEEN0_IMSIC (1ULL << 58) #define SMSTATEEN0_AIA (1ULL << 59) @@ -670,6 +673,8 @@ typedef enum RISCVException { RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */ RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */ RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */ + RISCV_EXCP_SW_CHECK = 0x12, /* since: priv-1.13.0 */ + RISCV_EXCP_HW_ERR = 0x13, /* since: priv-1.13.0 */ RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14, RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15, RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16, @@ -695,7 +700,8 @@ typedef enum RISCVException { #define IRQ_M_EXT 11 #define IRQ_S_GEXT 12 #define IRQ_PMU_OVF 13 -#define IRQ_LOCAL_MAX 16 +#define IRQ_LOCAL_MAX 64 +/* -1 is due to bit zero of hgeip and hgeie being ROZ. */ #define IRQ_LOCAL_GUEST_MAX (TARGET_LONG_BITS - 1) /* mip masks */ diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index e1e4f32..fb7eebd 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -136,6 +136,7 @@ struct RISCVCPUConfig { * TCG always implement/can't be user disabled, * based on spec version. */ + bool has_priv_1_13; bool has_priv_1_12; bool has_priv_1_11; diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 58ef707..432c59d 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -762,14 +762,18 @@ static RISCVException write_vcsr(CPURISCVState *env, int csrno, } /* User Timers and Counters */ -static target_ulong get_ticks(bool shift) +static target_ulong get_ticks(bool shift, bool instructions) { int64_t val; target_ulong result; #if !defined(CONFIG_USER_ONLY) if (icount_enabled()) { - val = icount_get(); + if (instructions) { + val = icount_get_raw(); + } else { + val = icount_get(); + } } else { val = cpu_get_host_ticks(); } @@ -804,14 +808,14 @@ static RISCVException read_timeh(CPURISCVState *env, int csrno, static RISCVException read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val) { - *val = get_ticks(false); + *val = get_ticks(false, (csrno == CSR_INSTRET)); return RISCV_EXCP_NONE; } static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val) { - *val = get_ticks(true); + *val = get_ticks(true, (csrno == CSR_INSTRETH)); return RISCV_EXCP_NONE; } @@ -875,11 +879,11 @@ static RISCVException write_mhpmcounter(CPURISCVState *env, int csrno, int ctr_idx = csrno - CSR_MCYCLE; PMUCTRState *counter = &env->pmu_ctrs[ctr_idx]; uint64_t mhpmctr_val = val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); counter->mhpmcounter_val = val; - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - counter->mhpmcounter_prev = get_ticks(false); + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + counter->mhpmcounter_prev = get_ticks(false, instr); if (ctr_idx > 2) { if (riscv_cpu_mxl(env) == MXL_RV32) { mhpmctr_val = mhpmctr_val | @@ -902,12 +906,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno, PMUCTRState *counter = &env->pmu_ctrs[ctr_idx]; uint64_t mhpmctr_val = counter->mhpmcounter_val; uint64_t mhpmctrh_val = val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); counter->mhpmcounterh_val = val; mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32); - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - counter->mhpmcounterh_prev = get_ticks(true); + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + counter->mhpmcounterh_prev = get_ticks(true, instr); if (ctr_idx > 2) { riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx); } @@ -926,6 +930,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val, counter->mhpmcounter_prev; target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val : counter->mhpmcounter_val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); if (get_field(env->mcountinhibit, BIT(ctr_idx))) { /* @@ -946,9 +951,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val, * The kernel computes the perf delta by subtracting the current value from * the value it initialized previously (ctr_val). */ - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - *val = get_ticks(upper_half) - ctr_prev + ctr_val; + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + *val = get_ticks(upper_half, instr) - ctr_prev + ctr_val; } else { *val = ctr_val; } @@ -1145,7 +1149,14 @@ static RISCVException write_stimecmph(CPURISCVState *env, int csrno, #define VSTOPI_NUM_SRCS 5 -#define LOCAL_INTERRUPTS (~0x1FFF) +/* + * All core local interrupts except the fixed ones 0:12. This macro is for + * virtual interrupts logic so please don't change this to avoid messing up + * the whole support, For reference see AIA spec: `5.3 Interrupt filtering and + * virtual interrupts for supervisor level` and `6.3.2 Virtual interrupts for + * VS level`. + */ +#define LOCAL_INTERRUPTS (~0x1FFFULL) static const uint64_t delegable_ints = S_MODE_INTERRUPTS | VS_MODE_INTERRUPTS | MIP_LCOFIP; @@ -1197,18 +1208,18 @@ static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE | */ /* Bit STIP can be an alias of mip.STIP that's why it's writable in mvip. */ -static const target_ulong mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP | +static const uint64_t mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP | LOCAL_INTERRUPTS; -static const target_ulong mvien_writable_mask = MIP_SSIP | MIP_SEIP | +static const uint64_t mvien_writable_mask = MIP_SSIP | MIP_SEIP | LOCAL_INTERRUPTS; -static const target_ulong sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS; -static const target_ulong hip_writable_mask = MIP_VSSIP; -static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | +static const uint64_t sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS; +static const uint64_t hip_writable_mask = MIP_VSSIP; +static const uint64_t hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP | LOCAL_INTERRUPTS; -static const target_ulong hvien_writable_mask = LOCAL_INTERRUPTS; +static const uint64_t hvien_writable_mask = LOCAL_INTERRUPTS; -static const target_ulong vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS; +static const uint64_t vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS; const bool valid_vm_1_10_32[16] = { [VM_1_10_MBARE] = true, @@ -2245,6 +2256,10 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno, wr_mask |= SMSTATEEN0_FCSR; } + if (env->priv_ver >= PRIV_VERSION_1_13_0) { + wr_mask |= SMSTATEEN0_P1P13; + } + return write_mstateen(env, csrno, wr_mask, new_val); } @@ -2280,6 +2295,10 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + if (env->priv_ver >= PRIV_VERSION_1_13_0) { + wr_mask |= SMSTATEEN0_P1P13; + } + return write_mstateenh(env, csrno, wr_mask, new_val); } @@ -3214,6 +3233,33 @@ static RISCVException write_hedeleg(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } +static RISCVException read_hedelegh(CPURISCVState *env, int csrno, + target_ulong *val) +{ + RISCVException ret; + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + + /* Reserved, now read zero */ + *val = 0; + return RISCV_EXCP_NONE; +} + +static RISCVException write_hedelegh(CPURISCVState *env, int csrno, + target_ulong val) +{ + RISCVException ret; + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + + /* Reserved, now write ignore */ + return RISCV_EXCP_NONE; +} + static RISCVException rmw_hvien64(CPURISCVState *env, int csrno, uint64_t *ret_val, uint64_t new_val, uint64_t wr_mask) @@ -4618,6 +4664,10 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { "mstatush", any32, read_mstatush, write_mstatush }, + [CSR_MEDELEGH] = { "medelegh", any32, read_zero, write_ignore, + .min_priv_ver = PRIV_VERSION_1_13_0 }, + [CSR_HEDELEGH] = { "hedelegh", hmode32, read_hedelegh, write_hedelegh, + .min_priv_ver = PRIV_VERSION_1_13_0 }, /* Machine Trap Handling */ [CSR_MSCRATCH] = { "mscratch", any, read_mscratch, write_mscratch, diff --git a/target/riscv/debug.c b/target/riscv/debug.c index b110370..0b5099f 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -241,6 +241,76 @@ static void do_trigger_action(CPURISCVState *env, target_ulong trigger_index) } } +/* + * Check the privilege level of specific trigger matches CPU's current privilege + * level. + */ +static bool trigger_priv_match(CPURISCVState *env, trigger_type_t type, + int trigger_index) +{ + target_ulong ctrl = env->tdata1[trigger_index]; + + switch (type) { + case TRIGGER_TYPE_AD_MATCH: + /* type 2 trigger cannot be fired in VU/VS mode */ + if (env->virt_enabled) { + return false; + } + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 3) & BIT(env->priv)) { + return true; + } + break; + case TRIGGER_TYPE_AD_MATCH6: + if (env->virt_enabled) { + /* check VU/VS bit against current privilege level */ + if ((ctrl >> 23) & BIT(env->priv)) { + return true; + } + } else { + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 3) & BIT(env->priv)) { + return true; + } + } + break; + case TRIGGER_TYPE_INST_CNT: + if (env->virt_enabled) { + /* check VU/VS bit against current privilege level */ + if ((ctrl >> 25) & BIT(env->priv)) { + return true; + } + } else { + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 6) & BIT(env->priv)) { + return true; + } + } + break; + case TRIGGER_TYPE_INT: + case TRIGGER_TYPE_EXCP: + case TRIGGER_TYPE_EXT_SRC: + qemu_log_mask(LOG_UNIMP, "trigger type: %d is not supported\n", type); + break; + case TRIGGER_TYPE_NO_EXIST: + case TRIGGER_TYPE_UNAVAIL: + qemu_log_mask(LOG_GUEST_ERROR, "trigger type: %d does not exist\n", + type); + break; + default: + g_assert_not_reached(); + } + + return false; +} + +/* Common matching conditions for all types of the triggers. */ +static bool trigger_common_match(CPURISCVState *env, trigger_type_t type, + int trigger_index) +{ + return trigger_priv_match(env, type, trigger_index); +} + /* type 2 trigger */ static uint32_t type2_breakpoint_size(CPURISCVState *env, target_ulong ctrl) @@ -554,7 +624,7 @@ void helper_itrigger_match(CPURISCVState *env) if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) { continue; } - if (check_itrigger_priv(env, i)) { + if (!trigger_common_match(env, TRIGGER_TYPE_INST_CNT, i)) { continue; } count = itrigger_get_count(env, i); @@ -785,22 +855,18 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) for (i = 0; i < RV_MAX_TRIGGERS; i++) { trigger_type = get_trigger_type(env, i); + if (!trigger_common_match(env, trigger_type, i)) { + continue; + } + switch (trigger_type) { case TRIGGER_TYPE_AD_MATCH: - /* type 2 trigger cannot be fired in VU/VS mode */ - if (env->virt_enabled) { - return false; - } - ctrl = env->tdata1[i]; pc = env->tdata2[i]; if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } + env->badaddr = pc; + return true; } break; case TRIGGER_TYPE_AD_MATCH6: @@ -808,19 +874,8 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) pc = env->tdata2[i]; if ((ctrl & TYPE6_EXEC) && (bp->pc == pc)) { - if (env->virt_enabled) { - /* check VU/VS bit against current privilege level */ - if ((ctrl >> 23) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } - } else { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } - } + env->badaddr = pc; + return true; } break; default: @@ -846,13 +901,12 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) for (i = 0; i < RV_MAX_TRIGGERS; i++) { trigger_type = get_trigger_type(env, i); + if (!trigger_common_match(env, trigger_type, i)) { + continue; + } + switch (trigger_type) { case TRIGGER_TYPE_AD_MATCH: - /* type 2 trigger cannot be fired in VU/VS mode */ - if (env->virt_enabled) { - return false; - } - ctrl = env->tdata1[i]; addr = env->tdata2[i]; flags = 0; @@ -865,10 +919,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) } if ((wp->flags & flags) && (wp->vaddr == addr)) { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - return true; - } + return true; } break; case TRIGGER_TYPE_AD_MATCH6: @@ -884,17 +935,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) } if ((wp->flags & flags) && (wp->vaddr == addr)) { - if (env->virt_enabled) { - /* check VU/VS bit against current privilege level */ - if ((ctrl >> 23) & BIT(env->priv)) { - return true; - } - } else { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - return true; - } - } + return true; } break; default: diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 871a70a..91b1a56 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -676,7 +676,7 @@ uint64_t helper_fround_h(CPURISCVState *env, uint64_t rs1) uint64_t helper_froundnx_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_s(env, rs1); + float16 frs1 = check_nanbox_h(env, rs1); frs1 = float16_round_to_int(frs1, &env->fp_status); return nanbox_h(env, frs1); } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 235e2cd..1047961 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1555,6 +1555,21 @@ static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run) return ret; } +static bool kvm_riscv_handle_debug(CPUState *cs) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + + /* Ensure PC is synchronised */ + kvm_cpu_synchronize_state(cs); + + if (kvm_find_sw_breakpoint(cs, env->pc)) { + return true; + } + + return false; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1565,6 +1580,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_RISCV_CSR: ret = kvm_riscv_handle_csr(cs, run); break; + case KVM_EXIT_DEBUG: + if (kvm_riscv_handle_debug(cs)) { + ret = EXCP_DEBUG; + } + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", __func__, run->exit_reason); @@ -1969,3 +1989,72 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = { }; DEFINE_TYPES(riscv_kvm_cpu_type_infos) + +static const uint32_t ebreak_insn = 0x00100073; +static const uint16_t c_ebreak_insn = 0x9002; + +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 0)) { + return -EINVAL; + } + + if ((bp->saved_insn & 0x3) == 0x3) { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) + || cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak_insn, 4, 1)) { + return -EINVAL; + } + } else { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak_insn, 2, 1)) { + return -EINVAL; + } + } + + return 0; +} + +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + uint32_t ebreak; + uint16_t c_ebreak; + + if ((bp->saved_insn & 0x3) == 0x3) { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak, 4, 0) || + ebreak != ebreak_insn || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { + return -EINVAL; + } + } else { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak, 2, 0) || + c_ebreak != c_ebreak_insn || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 1)) { + return -EINVAL; + } + } + + return 0; +} + +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + /* TODO; To be implemented later. */ + return -EINVAL; +} + +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + /* TODO; To be implemented later. */ + return -EINVAL; +} + +void kvm_arch_remove_all_hw_breakpoints(void) +{ + /* TODO; To be implemented later. */ +} + +void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) +{ + if (kvm_sw_breakpoints_active(cs)) { + dbg->control |= KVM_GUESTDBG_ENABLE; + } +} diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 683f604..ae25686 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -31,11 +31,17 @@ #include "hw/core/accel-cpu.h" #include "hw/core/tcg-cpu-ops.h" #include "tcg/tcg.h" +#ifndef CONFIG_USER_ONLY +#include "hw/boards.h" +#endif /* Hash that stores user set extensions */ static GHashTable *multi_ext_user_opts; static GHashTable *misa_ext_user_opts; +static GHashTable *multi_ext_implied_rules; +static GHashTable *misa_ext_implied_rules; + static bool cpu_cfg_ext_is_user_set(uint32_t ext_offset) { return g_hash_table_contains(multi_ext_user_opts, @@ -76,16 +82,11 @@ static void riscv_cpu_write_misa_bit(RISCVCPU *cpu, uint32_t bit, static const char *cpu_priv_ver_to_str(int priv_ver) { - switch (priv_ver) { - case PRIV_VERSION_1_10_0: - return "v1.10.0"; - case PRIV_VERSION_1_11_0: - return "v1.11.0"; - case PRIV_VERSION_1_12_0: - return "v1.12.0"; - } + const char *priv_spec_str = priv_spec_to_str(priv_ver); - g_assert_not_reached(); + g_assert(priv_spec_str); + + return priv_spec_str; } static void riscv_cpu_synchronize_from_tb(CPUState *cs, @@ -323,6 +324,10 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu) cpu->cfg.has_priv_1_12 = true; } + if (cpu->env.priv_ver >= PRIV_VERSION_1_13_0) { + cpu->cfg.has_priv_1_13 = true; + } + /* zic64b is 1.12 or later */ cpu->cfg.ext_zic64b = cpu->cfg.cbom_blocksize == 64 && cpu->cfg.cbop_blocksize == 64 && @@ -466,10 +471,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (cpu->cfg.ext_zfh) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zfhmin), true); - } - if (cpu->cfg.ext_zfhmin && !riscv_has_ext(env, RVF)) { error_setg(errp, "Zfh/Zfhmin extensions require F extension"); return; @@ -491,9 +492,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_propagate(errp, local_err); return; } - - /* The V vector extension depends on the Zve64d extension */ - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64d), true); } /* The Zve64d extension depends on the Zve64f extension */ @@ -502,18 +500,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_setg(errp, "Zve64d/V extensions require D extension"); return; } - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true); - } - - /* The Zve64f extension depends on the Zve64x and Zve32f extensions */ - if (cpu->cfg.ext_zve64f) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64x), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true); - } - - /* The Zve64x extension depends on the Zve32x extension */ - if (cpu->cfg.ext_zve64x) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); } /* The Zve32f extension depends on the Zve32x extension */ @@ -522,11 +508,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_setg(errp, "Zve32f/Zve64f extensions require F extension"); return; } - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); - } - - if (cpu->cfg.ext_zvfh) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvfhmin), true); } if (cpu->cfg.ext_zvfhmin && !cpu->cfg.ext_zve32f) { @@ -549,11 +530,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - /* Set the ISA extensions, checks should have happened above */ - if (cpu->cfg.ext_zhinx) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - } - if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) { error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx"); return; @@ -571,27 +547,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) } } - if (cpu->cfg.ext_zce) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); - } - } - - /* zca, zcd and zcf has a PRIV 1.12.0 restriction */ - if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); - } - if (riscv_has_ext(env, RVD)) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true); - } - } - if (mcc->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { error_setg(errp, "Zcf extension is only relevant to RV32"); return; @@ -625,48 +580,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - /* - * Shorthand vector crypto extensions - */ - if (cpu->cfg.ext_zvknc) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - - if (cpu->cfg.ext_zvkng) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true); - } - - if (cpu->cfg.ext_zvkn) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkned), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvknhb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true); - } - - if (cpu->cfg.ext_zvksc) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - - if (cpu->cfg.ext_zvksg) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true); - } - - if (cpu->cfg.ext_zvks) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksed), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksh), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true); - } - - if (cpu->cfg.ext_zvkt) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32x) { @@ -682,29 +595,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (cpu->cfg.ext_zk) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkr), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkt), true); - } - - if (cpu->cfg.ext_zkn) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkne), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknd), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknh), true); - } - - if (cpu->cfg.ext_zks) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksed), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksh), true); - } - if (cpu->cfg.ext_zicntr && !cpu->cfg.ext_zicsr) { if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zicntr))) { error_setg(errp, "zicntr requires zicsr"); @@ -833,11 +723,151 @@ static void riscv_cpu_validate_profiles(RISCVCPU *cpu) } } +static void riscv_cpu_init_implied_exts_rules(void) +{ + RISCVCPUImpliedExtsRule *rule; +#ifndef CONFIG_USER_ONLY + MachineState *ms = MACHINE(qdev_get_machine()); +#endif + static bool initialized; + int i; + + /* Implied rules only need to be initialized once. */ + if (initialized) { + return; + } + + for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) { +#ifndef CONFIG_USER_ONLY + rule->enabled = bitmap_new(ms->smp.cpus); +#endif + g_hash_table_insert(misa_ext_implied_rules, + GUINT_TO_POINTER(rule->ext), (gpointer)rule); + } + + for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) { +#ifndef CONFIG_USER_ONLY + rule->enabled = bitmap_new(ms->smp.cpus); +#endif + g_hash_table_insert(multi_ext_implied_rules, + GUINT_TO_POINTER(rule->ext), (gpointer)rule); + } + + initialized = true; +} + +static void cpu_enable_implied_rule(RISCVCPU *cpu, + RISCVCPUImpliedExtsRule *rule) +{ + CPURISCVState *env = &cpu->env; + RISCVCPUImpliedExtsRule *ir; + bool enabled = false; + int i; + +#ifndef CONFIG_USER_ONLY + enabled = test_bit(cpu->env.mhartid, rule->enabled); +#endif + + if (!enabled) { + /* Enable the implied MISAs. */ + if (rule->implied_misa_exts) { + riscv_cpu_set_misa_ext(env, + env->misa_ext | rule->implied_misa_exts); + + for (i = 0; misa_bits[i] != 0; i++) { + if (rule->implied_misa_exts & misa_bits[i]) { + ir = g_hash_table_lookup(misa_ext_implied_rules, + GUINT_TO_POINTER(misa_bits[i])); + + if (ir) { + cpu_enable_implied_rule(cpu, ir); + } + } + } + } + + /* Enable the implied extensions. */ + for (i = 0; + rule->implied_multi_exts[i] != RISCV_IMPLIED_EXTS_RULE_END; i++) { + cpu_cfg_ext_auto_update(cpu, rule->implied_multi_exts[i], true); + + ir = g_hash_table_lookup(multi_ext_implied_rules, + GUINT_TO_POINTER( + rule->implied_multi_exts[i])); + + if (ir) { + cpu_enable_implied_rule(cpu, ir); + } + } + +#ifndef CONFIG_USER_ONLY + bitmap_set(rule->enabled, cpu->env.mhartid, 1); +#endif + } +} + +/* Zc extension has special implied rules that need to be handled separately. */ +static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) +{ + RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); + CPURISCVState *env = &cpu->env; + + if (cpu->cfg.ext_zce) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); + + if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + } + + /* Zca, Zcd and Zcf has a PRIV 1.12.0 restriction */ + if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + + if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + + if (riscv_has_ext(env, RVD)) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true); + } + } +} + +static void riscv_cpu_enable_implied_rules(RISCVCPU *cpu) +{ + RISCVCPUImpliedExtsRule *rule; + int i; + + /* Enable the implied extensions for Zc. */ + cpu_enable_zc_implied_rules(cpu); + + /* Enable the implied MISAs. */ + for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) { + if (riscv_has_ext(&cpu->env, rule->ext)) { + cpu_enable_implied_rule(cpu, rule); + } + } + + /* Enable the implied extensions. */ + for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) { + if (isa_ext_is_enabled(cpu, rule->ext)) { + cpu_enable_implied_rule(cpu, rule); + } + } +} + void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp) { CPURISCVState *env = &cpu->env; Error *local_err = NULL; + riscv_cpu_init_implied_exts_rules(); + riscv_cpu_enable_implied_rules(cpu); + riscv_cpu_validate_misa_priv(env, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -1343,6 +1373,15 @@ static void riscv_tcg_cpu_instance_init(CPUState *cs) misa_ext_user_opts = g_hash_table_new(NULL, g_direct_equal); multi_ext_user_opts = g_hash_table_new(NULL, g_direct_equal); + + if (!misa_ext_implied_rules) { + misa_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal); + } + + if (!multi_ext_implied_rules) { + multi_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal); + } + riscv_cpu_add_user_properties(obj); if (riscv_cpu_has_max_extensions(obj)) { diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig index d886be4..8a95f2b 100644 --- a/target/s390x/Kconfig +++ b/target/s390x/Kconfig @@ -2,3 +2,8 @@ config S390X bool select PCI select S390_FLIC + +config S390X_LEGACY_CPUS + bool + default y + depends on S390X diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c index 7e8a1b4..029d91d 100644 --- a/target/s390x/arch_dump.c +++ b/target/s390x/arch_dump.c @@ -102,7 +102,7 @@ static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id) regs->acrs[i] = cpu_to_be32(cpu->env.aregs[i]); regs->gprs[i] = cpu_to_be64(cpu->env.regs[i]); } - note->contents.prstatus.pid = id; + note->contents.prstatus.pid = cpu_to_be32(id); } static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id) diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index efb508c..a27f4b6 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -25,6 +25,7 @@ #ifndef CONFIG_USER_ONLY #include "sysemu/sysemu.h" #include "target/s390x/kvm/pv.h" +#include CONFIG_DEVICES #endif #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \ @@ -47,6 +48,13 @@ * generation 15 one base feature and one optional feature have been deprecated. */ static S390CPUDef s390_cpu_defs[] = { + /* + * Linux requires at least z10 nowadays, and IBM only supports recent CPUs + * (see https://www.ibm.com/support/pages/ibm-mainframe-life-cycle-history), + * so we consider older CPUs as legacy that can optionally be disabled via + * the CONFIG_S390X_LEGACY_CPUS config switch. + */ +#if defined(CONFIG_S390X_LEGACY_CPUS) || defined(CONFIG_USER_ONLY) CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), CPUDEF_INIT(0x2064, 7, 3, 38, 0x00000000U, "z900.3", "IBM zSeries 900 GA3"), @@ -64,6 +72,7 @@ static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2096, 9, 2, 40, 0x00000000U, "z9BC", "IBM System z9 BC GA1"), CPUDEF_INIT(0x2094, 9, 3, 40, 0x00000000U, "z9EC.3", "IBM System z9 EC GA3"), CPUDEF_INIT(0x2096, 9, 3, 40, 0x00000000U, "z9BC.2", "IBM System z9 BC GA2"), +#endif CPUDEF_INIT(0x2097, 10, 1, 43, 0x00000000U, "z10EC", "IBM System z10 EC GA1"), CPUDEF_INIT(0x2097, 10, 2, 43, 0x00000000U, "z10EC.2", "IBM System z10 EC GA2"), CPUDEF_INIT(0x2098, 10, 2, 43, 0x00000000U, "z10BC", "IBM System z10 BC GA1"), diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 1b494ec..94181d9 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -40,7 +40,7 @@ #include "sysemu/hw_accel.h" #include "sysemu/runstate.h" #include "sysemu/device_tree.h" -#include "exec/gdbstub.h" +#include "gdbstub/enums.h" #include "exec/ram_addr.h" #include "trace.h" #include "hw/s390x/s390-pci-inst.h" diff --git a/tests/qtest/fuzz/qos_fuzz.c b/tests/qtest/fuzz/qos_fuzz.c index b71e945..d3839bf 100644 --- a/tests/qtest/fuzz/qos_fuzz.c +++ b/tests/qtest/fuzz/qos_fuzz.c @@ -180,6 +180,7 @@ static void walk_path(QOSGraphNode *orig_path, int len) fuzz_path_vec = path_vec; } else { + g_string_free(cmd_line, true); g_free(path_vec); } diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 6a0de33..213114b 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -645,6 +645,11 @@ int64_t qemu_clock_get_ns(QEMUClockType type) } } +static void qemu_virtual_clock_set_ns(int64_t time) +{ + return cpus_set_virtual_clock(time); +} + void init_clocks(QEMUTimerListNotifyCB *notify_cb) { QEMUClockType type; @@ -675,3 +680,24 @@ bool qemu_clock_run_all_timers(void) return progress; } + +int64_t qemu_clock_advance_virtual_time(int64_t dest) +{ + int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + AioContext *aio_context; + aio_context = qemu_get_aio_context(); + while (clock < dest) { + int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, + QEMU_TIMER_ATTR_ALL); + int64_t warp = qemu_soonest_timeout(dest - clock, deadline); + + qemu_virtual_clock_set_ns(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + warp); + + qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); + timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); + clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + + return clock; +} |