aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS5
-rw-r--r--accel/hvf/hvf-accel-ops.c2
-rw-r--r--accel/kvm/kvm-all.c2
-rw-r--r--accel/qtest/qtest.c13
-rw-r--r--accel/tcg/ldst_common.c.inc8
-rw-r--r--accel/tcg/plugin-gen.c4
-rw-r--r--accel/tcg/tcg-accel-ops.c2
-rw-r--r--backends/host_iommu_device.c33
-rw-r--r--backends/iommufd.c76
-rw-r--r--backends/meson.build1
-rw-r--r--configs/targets/riscv64-softmmu.mak1
-rw-r--r--contrib/plugins/Makefile1
-rw-r--r--contrib/plugins/ips.c164
-rw-r--r--gdbstub/user.c1
-rw-r--r--hw/i386/intel_iommu.c203
-rw-r--r--hw/pci/pci.c79
-rw-r--r--hw/riscv/virt.c38
-rw-r--r--hw/s390x/ccw-device.c3
-rw-r--r--hw/s390x/ccw-device.h2
-rw-r--r--hw/s390x/s390-ccw.c29
-rw-r--r--hw/s390x/s390-virtio-ccw.c5
-rw-r--r--hw/vfio/ap.c2
-rw-r--r--hw/vfio/ccw.c18
-rw-r--r--hw/vfio/common.c150
-rw-r--r--hw/vfio/container-base.c70
-rw-r--r--hw/vfio/container.c164
-rw-r--r--hw/vfio/helpers.c17
-rw-r--r--hw/vfio/iommufd.c68
-rw-r--r--hw/vfio/pci.c23
-rw-r--r--hw/vfio/spapr.c3
-rw-r--r--hw/virtio/virtio-iommu.c296
-rw-r--r--include/exec/gdbstub.h11
-rw-r--r--include/exec/memory.h32
-rw-r--r--include/gdbstub/enums.h21
-rw-r--r--include/hw/i386/intel_iommu.h2
-rw-r--r--include/hw/pci/pci.h38
-rw-r--r--include/hw/riscv/virt.h1
-rw-r--r--include/hw/s390x/s390-ccw.h2
-rw-r--r--include/hw/vfio/vfio-common.h18
-rw-r--r--include/hw/vfio/vfio-container-base.h22
-rw-r--r--include/hw/virtio/virtio-iommu.h2
-rw-r--r--include/qemu/qemu-plugin.h27
-rw-r--r--include/qemu/range.h11
-rw-r--r--include/qemu/timer.h15
-rw-r--r--include/sysemu/accel-ops.h18
-rw-r--r--include/sysemu/cpu-timers.h3
-rw-r--r--include/sysemu/host_iommu_device.h102
-rw-r--r--include/sysemu/iommufd.h19
-rw-r--r--include/sysemu/qtest.h2
-rw-r--r--monitor/hmp-cmds.c3
-rw-r--r--plugins/api.c49
-rw-r--r--plugins/core.c4
-rw-r--r--plugins/qemu-plugins.symbols2
-rw-r--r--stubs/cpus-virtual-clock.c (renamed from stubs/cpus-get-virtual-clock.c)5
-rw-r--r--stubs/meson.build2
-rw-r--r--system/cpus.c11
-rw-r--r--system/memory.c13
-rw-r--r--system/qtest.c37
-rw-r--r--system/vl.c1
-rw-r--r--target/arm/hvf/hvf.c2
-rw-r--r--target/arm/hyp_gdbstub.c2
-rw-r--r--target/arm/kvm.c2
-rw-r--r--target/i386/kvm/kvm.c2
-rw-r--r--target/ppc/kvm.c2
-rw-r--r--target/riscv/cpu.c404
-rw-r--r--target/riscv/cpu.h28
-rw-r--r--target/riscv/cpu_bits.h8
-rw-r--r--target/riscv/cpu_cfg.h1
-rw-r--r--target/riscv/csr.c92
-rw-r--r--target/riscv/debug.c129
-rw-r--r--target/riscv/fpu_helper.c2
-rw-r--r--target/riscv/kvm/kvm-cpu.c89
-rw-r--r--target/riscv/tcg/tcg-cpu.c287
-rw-r--r--target/s390x/Kconfig5
-rw-r--r--target/s390x/arch_dump.c2
-rw-r--r--target/s390x/cpu_models.c9
-rw-r--r--target/s390x/kvm/kvm.c2
-rw-r--r--tests/qtest/fuzz/qos_fuzz.c1
-rw-r--r--util/qemu-timer.c26
79 files changed, 2371 insertions, 680 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index cef54de..19f67dc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2198,6 +2198,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com>
S: Supported
F: backends/iommufd.c
F: include/sysemu/iommufd.h
+F: backends/host_iommu_device.c
+F: include/sysemu/host_iommu_device.h
F: include/qemu/chardev_open.h
F: util/chardev_open.c
F: docs/devel/vfio-iommufd.rst
@@ -3312,6 +3314,7 @@ F: tests/qtest/
F: docs/devel/qgraph.rst
F: docs/devel/qtest.rst
X: tests/qtest/bios-tables-test*
+X: tests/qtest/migration-*
Device Fuzzing
M: Alexander Bulekov <alxndr@bu.edu>
@@ -3408,7 +3411,7 @@ F: include/qemu/userfaultfd.h
F: migration/
F: scripts/vmstate-static-checker.py
F: tests/vmstate-static-checker-data/
-F: tests/qtest/migration-test.c
+F: tests/qtest/migration-*
F: docs/devel/migration/
F: qapi/migration.json
F: tests/migration/
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index b2a37a2..ac08cfb 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -52,7 +52,7 @@
#include "qemu/main-loop.h"
#include "exec/address-spaces.h"
#include "exec/exec-all.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "sysemu/cpus.h"
#include "sysemu/hvf.h"
#include "sysemu/hvf_int.h"
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 854cb86..2b4ab89 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -27,7 +27,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/s390x/adapter.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "sysemu/kvm_int.h"
#include "sysemu/runstate.h"
#include "sysemu/cpus.h"
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index f6056ac..bf14032 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -24,6 +24,18 @@
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
+static int64_t qtest_clock_counter;
+
+static int64_t qtest_get_virtual_clock(void)
+{
+ return qatomic_read_i64(&qtest_clock_counter);
+}
+
+static void qtest_set_virtual_clock(int64_t count)
+{
+ qatomic_set_i64(&qtest_clock_counter, count);
+}
+
static int qtest_init_accel(MachineState *ms)
{
return 0;
@@ -52,6 +64,7 @@ static void qtest_accel_ops_class_init(ObjectClass *oc, void *data)
ops->create_vcpu_thread = dummy_start_vcpu_thread;
ops->get_virtual_clock = qtest_get_virtual_clock;
+ ops->set_virtual_clock = qtest_set_virtual_clock;
};
static const TypeInfo qtest_accel_ops_type = {
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
index c82048e..87ceb95 100644
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -125,7 +125,9 @@ void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ }
}
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
@@ -188,7 +190,9 @@ Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+ if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+ }
}
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index cc1634e..b6bae32 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -240,13 +240,13 @@ static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb,
{
switch (cb->type) {
case PLUGIN_CB_MEM_REGULAR:
- if (rw && cb->regular.rw) {
+ if (rw & cb->regular.rw) {
gen_mem_cb(&cb->regular, meminfo, addr);
}
break;
case PLUGIN_CB_INLINE_ADD_U64:
case PLUGIN_CB_INLINE_STORE_U64:
- if (rw && cb->inline_insn.rw) {
+ if (rw & cb->inline_insn.rw) {
inject_cb(cb);
}
break;
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 1433e38..3c19e68 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -35,7 +35,7 @@
#include "exec/exec-all.h"
#include "exec/hwaddr.h"
#include "exec/tb-flush.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "hw/core/cpu.h"
diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c
new file mode 100644
index 0000000..8f2dda1
--- /dev/null
+++ b/backends/host_iommu_device.c
@@ -0,0 +1,33 @@
+/*
+ * Host IOMMU device abstract
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/host_iommu_device.h"
+
+OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice,
+ host_iommu_device,
+ HOST_IOMMU_DEVICE,
+ OBJECT)
+
+static void host_iommu_device_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void host_iommu_device_init(Object *obj)
+{
+}
+
+static void host_iommu_device_finalize(Object *obj)
+{
+ HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj);
+
+ g_free(hiod->name);
+}
diff --git a/backends/iommufd.c b/backends/iommufd.c
index c506afb..84fefbc 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -208,23 +208,69 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
return ret;
}
-static const TypeInfo iommufd_backend_info = {
- .name = TYPE_IOMMUFD_BACKEND,
- .parent = TYPE_OBJECT,
- .instance_size = sizeof(IOMMUFDBackend),
- .instance_init = iommufd_backend_init,
- .instance_finalize = iommufd_backend_finalize,
- .class_size = sizeof(IOMMUFDBackendClass),
- .class_init = iommufd_backend_class_init,
- .interfaces = (InterfaceInfo[]) {
- { TYPE_USER_CREATABLE },
- { }
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp)
+{
+ struct iommu_hw_info info = {
+ .size = sizeof(info),
+ .dev_id = devid,
+ .data_len = len,
+ .data_uptr = (uintptr_t)data,
+ };
+
+ if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) {
+ error_setg_errno(errp, errno, "Failed to get hardware info");
+ return false;
}
-};
-static void register_types(void)
+ g_assert(type);
+ *type = info.out_data_type;
+
+ return true;
+}
+
+static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
- type_register_static(&iommufd_backend_info);
+ HostIOMMUDeviceCaps *caps = &hiod->caps;
+
+ switch (cap) {
+ case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
+ return caps->type;
+ case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+ return caps->aw_bits;
+ default:
+ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+ return -EINVAL;
+ }
}
-type_init(register_types);
+static void hiod_iommufd_class_init(ObjectClass *oc, void *data)
+{
+ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hioc->get_cap = hiod_iommufd_get_cap;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_IOMMUFD_BACKEND,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(IOMMUFDBackend),
+ .instance_init = iommufd_backend_init,
+ .instance_finalize = iommufd_backend_finalize,
+ .class_size = sizeof(IOMMUFDBackendClass),
+ .class_init = iommufd_backend_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+ .parent = TYPE_HOST_IOMMU_DEVICE,
+ .class_init = hiod_iommufd_class_init,
+ .abstract = true,
+ }
+};
+
+DEFINE_TYPES(types)
diff --git a/backends/meson.build b/backends/meson.build
index 8b2b111..106312f 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -16,6 +16,7 @@ if host_os != 'windows'
endif
if host_os == 'linux'
system_ss.add(files('hostmem-memfd.c'))
+ system_ss.add(files('host_iommu_device.c'))
endif
if keyutils.found()
system_ss.add(keyutils, files('cryptodev-lkcf.c'))
diff --git a/configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak
index f688ffa..917980e 100644
--- a/configs/targets/riscv64-softmmu.mak
+++ b/configs/targets/riscv64-softmmu.mak
@@ -1,6 +1,7 @@
TARGET_ARCH=riscv64
TARGET_BASE_ARCH=riscv
TARGET_SUPPORTS_MTTCG=y
+TARGET_KVM_HAVE_GUEST_DEBUG=y
TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml
# needed by boot.c
TARGET_NEED_FDT=y
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index 0b64d2c..449ead1 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -27,6 +27,7 @@ endif
NAMES += hwprofile
NAMES += cache
NAMES += drcov
+NAMES += ips
ifeq ($(CONFIG_WIN32),y)
SO_SUFFIX := .dll
diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
new file mode 100644
index 0000000..29fa556
--- /dev/null
+++ b/contrib/plugins/ips.c
@@ -0,0 +1,164 @@
+/*
+ * Instructions Per Second (IPS) rate limiting plugin.
+ *
+ * This plugin can be used to restrict the execution of a system to a
+ * particular number of Instructions Per Second (IPS). This controls
+ * time as seen by the guest so while wall-clock time may be longer
+ * from the guests point of view time will pass at the normal rate.
+ *
+ * This uses the new plugin API which allows the plugin to control
+ * system time.
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <glib.h>
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* how many times do we update time per sec */
+#define NUM_TIME_UPDATE_PER_SEC 10
+#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
+
+static GMutex global_state_lock;
+
+static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
+static uint64_t max_insn_per_quantum; /* trap every N instructions */
+static int64_t virtual_time_ns; /* last set virtual time */
+
+static const void *time_handle;
+
+typedef struct {
+ uint64_t total_insn;
+ uint64_t quantum_insn; /* insn in last quantum */
+ int64_t last_quantum_time; /* time when last quantum started */
+} vCPUTime;
+
+struct qemu_plugin_scoreboard *vcpus;
+
+/* return epoch time in ns */
+static int64_t now_ns(void)
+{
+ return g_get_real_time() * 1000;
+}
+
+static uint64_t num_insn_during(int64_t elapsed_ns)
+{
+ double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
+ return num_secs * (double) max_insn_per_second;
+}
+
+static int64_t time_for_insn(uint64_t num_insn)
+{
+ double num_secs = (double) num_insn / (double) max_insn_per_second;
+ return num_secs * (double) NSEC_IN_ONE_SEC;
+}
+
+static void update_system_time(vCPUTime *vcpu)
+{
+ int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
+ uint64_t max_insn = num_insn_during(elapsed_ns);
+
+ if (vcpu->quantum_insn >= max_insn) {
+ /* this vcpu ran faster than expected, so it has to sleep */
+ uint64_t insn_advance = vcpu->quantum_insn - max_insn;
+ uint64_t time_advance_ns = time_for_insn(insn_advance);
+ int64_t sleep_us = time_advance_ns / 1000;
+ g_usleep(sleep_us);
+ }
+
+ vcpu->total_insn += vcpu->quantum_insn;
+ vcpu->quantum_insn = 0;
+ vcpu->last_quantum_time = now_ns();
+
+ /* based on total number of instructions, what should be the new time? */
+ int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
+
+ g_mutex_lock(&global_state_lock);
+
+ /* Time only moves forward. Another vcpu might have updated it already. */
+ if (new_virtual_time > virtual_time_ns) {
+ qemu_plugin_update_ns(time_handle, new_virtual_time);
+ virtual_time_ns = new_virtual_time;
+ }
+
+ g_mutex_unlock(&global_state_lock);
+}
+
+static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+ vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+ vcpu->total_insn = 0;
+ vcpu->quantum_insn = 0;
+ vcpu->last_quantum_time = now_ns();
+}
+
+static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+ vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+ update_system_time(vcpu);
+}
+
+static void every_quantum_insn(unsigned int cpu_index, void *udata)
+{
+ vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+ g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
+ update_system_time(vcpu);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n_insns = qemu_plugin_tb_n_insns(tb);
+ qemu_plugin_u64 quantum_insn =
+ qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
+ /* count (and eventually trap) once per tb */
+ qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+ tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
+ qemu_plugin_register_vcpu_tb_exec_cond_cb(
+ tb, every_quantum_insn,
+ QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
+ quantum_insn, max_insn_per_quantum, NULL);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+ qemu_plugin_scoreboard_free(vcpus);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info, int argc,
+ char **argv)
+{
+ for (int i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+ if (g_strcmp0(tokens[0], "ips") == 0) {
+ max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
+ if (!max_insn_per_second && errno) {
+ fprintf(stderr, "%s: couldn't parse %s (%s)\n",
+ __func__, tokens[1], g_strerror(errno));
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
+ max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
+
+ time_handle = qemu_plugin_request_time_control();
+ g_assert(time_handle);
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
+ qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+ return 0;
+}
diff --git a/gdbstub/user.c b/gdbstub/user.c
index edeb72e..e34b58b 100644
--- a/gdbstub/user.c
+++ b/gdbstub/user.c
@@ -18,6 +18,7 @@
#include "exec/gdbstub.h"
#include "gdbstub/syscalls.h"
#include "gdbstub/user.h"
+#include "gdbstub/enums.h"
#include "hw/core/cpu.h"
#include "trace.h"
#include "internals.h"
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c4350e0..37c21a0a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -61,6 +61,12 @@ struct vtd_as_key {
uint32_t pasid;
};
+/* bus/devfn is PCI device's real BDF not the aliased one */
+struct vtd_hiod_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
struct vtd_iotlb_key {
uint64_t gfn;
uint32_t pasid;
@@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v)
return (guint)(value << 8 | key->devfn);
}
+/* Same implementation as vtd_as_hash() */
+static guint vtd_hiod_hash(gconstpointer v)
+{
+ return vtd_as_hash(v);
+}
+
+static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct vtd_hiod_key *key1 = v1;
+ const struct vtd_hiod_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static void vtd_hiod_destroy(gpointer v)
+{
+ object_unref(v);
+}
+
static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
gpointer user_data)
{
@@ -3812,6 +3837,87 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
return vtd_dev_as;
}
+static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ int ret;
+
+ if (!hiodc->get_cap) {
+ error_setg(errp, ".get_cap() not implemented");
+ return false;
+ }
+
+ /* Common checks */
+ ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
+ if (ret < 0) {
+ return false;
+ }
+ if (s->aw_bits > ret) {
+ error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
+ return false;
+ }
+
+ return true;
+}
+
+static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+ struct vtd_as_key *new_key;
+
+ assert(hiod);
+
+ vtd_iommu_lock(s);
+
+ if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ error_setg(errp, "Host IOMMU device already exist");
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
+ if (!vtd_check_hiod(s, hiod, errp)) {
+ vtd_iommu_unlock(s);
+ return false;
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod);
+
+ vtd_iommu_unlock(s);
+
+ return true;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ IntelIOMMUState *s = opaque;
+ struct vtd_as_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ vtd_iommu_lock(s);
+
+ if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
+ vtd_iommu_unlock(s);
+ return;
+ }
+
+ g_hash_table_remove(s->vtd_host_iommu_dev, &key);
+
+ vtd_iommu_unlock(s);
+}
+
/* Unmap the whole range in the notifier's scope. */
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
{
@@ -3934,30 +4040,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
return;
}
-/* Do the initialization. It will also be called when reset, so pay
- * attention when adding new initialization stuff.
- */
-static void vtd_init(IntelIOMMUState *s)
+static void vtd_cap_init(IntelIOMMUState *s)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
- memset(s->csr, 0, DMAR_REG_SIZE);
- memset(s->wmask, 0, DMAR_REG_SIZE);
- memset(s->w1cmask, 0, DMAR_REG_SIZE);
- memset(s->womask, 0, DMAR_REG_SIZE);
-
- s->root = 0;
- s->root_scalable = false;
- s->dmar_enabled = false;
- s->intr_enabled = false;
- s->iq_head = 0;
- s->iq_tail = 0;
- s->iq = 0;
- s->iq_size = 0;
- s->qi_enabled = false;
- s->iq_last_desc_type = VTD_INV_DESC_NONE;
- s->iq_dw = false;
- s->next_frcd_reg = 0;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
VTD_CAP_MGAW(s->aw_bits);
@@ -3974,27 +4060,6 @@ static void vtd_init(IntelIOMMUState *s)
}
s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
- /*
- * Rsvd field masks for spte
- */
- vtd_spte_rsvd[0] = ~0ULL;
- vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
- vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
-
- vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
- vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
- x86_iommu->dt_supported);
-
- if (s->scalable_mode || s->snoop_control) {
- vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
- vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
- }
-
if (x86_iommu_ir_supported(x86_iommu)) {
s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -4027,6 +4092,56 @@ static void vtd_init(IntelIOMMUState *s)
if (s->pasid) {
s->ecap |= VTD_ECAP_PASID;
}
+}
+
+/*
+ * Do the initialization. It will also be called when reset, so pay
+ * attention when adding new initialization stuff.
+ */
+static void vtd_init(IntelIOMMUState *s)
+{
+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+ memset(s->csr, 0, DMAR_REG_SIZE);
+ memset(s->wmask, 0, DMAR_REG_SIZE);
+ memset(s->w1cmask, 0, DMAR_REG_SIZE);
+ memset(s->womask, 0, DMAR_REG_SIZE);
+
+ s->root = 0;
+ s->root_scalable = false;
+ s->dmar_enabled = false;
+ s->intr_enabled = false;
+ s->iq_head = 0;
+ s->iq_tail = 0;
+ s->iq = 0;
+ s->iq_size = 0;
+ s->qi_enabled = false;
+ s->iq_last_desc_type = VTD_INV_DESC_NONE;
+ s->iq_dw = false;
+ s->next_frcd_reg = 0;
+
+ vtd_cap_init(s);
+
+ /*
+ * Rsvd field masks for spte
+ */
+ vtd_spte_rsvd[0] = ~0ULL;
+ vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+ vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+ vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+ vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+ vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
+ x86_iommu->dt_supported);
+
+ if (s->scalable_mode || s->snoop_control) {
+ vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+ vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+ }
vtd_reset_caches(s);
@@ -4107,6 +4222,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
+ .set_iommu_device = vtd_dev_set_iommu_device,
+ .unset_iommu_device = vtd_dev_unset_iommu_device,
};
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4226,6 +4343,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
g_free, g_free);
s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
g_free, g_free);
+ s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal,
+ g_free, vtd_hiod_destroy);
vtd_init(s);
pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 324c130..50b86d5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
}
}
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+/*
+ * Get IOMMU root bus, aliased bus and devfn of a PCI device
+ *
+ * IOMMU root bus is needed by all call sites to call into iommu_ops.
+ * For call sites which don't need aliased BDF, passing NULL to
+ * aliased_[bus|devfn] is allowed.
+ *
+ * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device.
+ *
+ * @aliased_bus: return aliased #PCIBus of the PCI device, optional.
+ *
+ * @aliased_devfn: return aliased devfn of the PCI device, optional.
+ */
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+ PCIBus **piommu_bus,
+ PCIBus **aliased_bus,
+ int *aliased_devfn)
{
PCIBus *bus = pci_get_bus(dev);
PCIBus *iommu_bus = bus;
- uint8_t devfn = dev->devfn;
+ int devfn = dev->devfn;
while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
@@ -2693,13 +2709,70 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
iommu_bus = parent_bus;
}
- if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+
+ assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+ assert(iommu_bus);
+
+ if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) {
+ iommu_bus = NULL;
+ }
+
+ *piommu_bus = iommu_bus;
+
+ if (aliased_bus) {
+ *aliased_bus = bus;
+ }
+
+ if (aliased_devfn) {
+ *aliased_devfn = devfn;
+ }
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus) {
return iommu_bus->iommu_ops->get_address_space(bus,
iommu_bus->iommu_opaque, devfn);
}
return &address_space_memory;
}
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ PCIBus *iommu_bus, *aliased_bus;
+ int aliased_devfn;
+
+ /* set_iommu_device requires device's direct BDF instead of aliased BDF */
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus,
+ &aliased_bus, &aliased_devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) {
+ hiod->aliased_bus = aliased_bus;
+ hiod->aliased_devfn = aliased_devfn;
+ return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
+ iommu_bus->iommu_opaque,
+ dev->devfn, hiod, errp);
+ }
+ return true;
+}
+
+void pci_device_unset_iommu_device(PCIDevice *dev)
+{
+ PCIBus *iommu_bus;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL);
+ if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) {
+ return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
+ iommu_bus->iommu_opaque,
+ dev->devfn);
+ }
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 5676d66..bc0893e 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -515,6 +515,9 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size;
g_autofree uint32_t *imsic_cells = NULL;
g_autofree uint32_t *imsic_regs = NULL;
+ static const char * const imsic_compat[2] = {
+ "qemu,imsics", "riscv,imsics"
+ };
imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
imsic_regs = g_new0(uint32_t, socket_count * 4);
@@ -538,13 +541,18 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
}
}
- imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr);
+ imsic_name = g_strdup_printf("/soc/interrupt-controller@%lx",
+ (unsigned long)base_addr);
qemu_fdt_add_subnode(ms->fdt, imsic_name);
- qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics");
+ qemu_fdt_setprop_string_array(ms->fdt, imsic_name, "compatible",
+ (char **)&imsic_compat,
+ ARRAY_SIZE(imsic_compat));
+
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
FDT_IMSIC_INT_CELLS);
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
+ qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#msi-cells", 0);
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
@@ -588,6 +596,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
}
+/* Caller must free string after use */
+static char *fdt_get_aplic_nodename(unsigned long aplic_addr)
+{
+ return g_strdup_printf("/soc/interrupt-controller@%lx", aplic_addr);
+}
+
static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
unsigned long aplic_addr, uint32_t aplic_size,
uint32_t msi_phandle,
@@ -597,18 +611,24 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
bool m_mode, int num_harts)
{
int cpu;
- g_autofree char *aplic_name = NULL;
+ g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr);
g_autofree uint32_t *aplic_cells = g_new0(uint32_t, num_harts * 2);
MachineState *ms = MACHINE(s);
+ static const char * const aplic_compat[2] = {
+ "qemu,aplic", "riscv,aplic"
+ };
for (cpu = 0; cpu < num_harts; cpu++) {
aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT);
}
- aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
qemu_fdt_add_subnode(ms->fdt, aplic_name);
- qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic");
+ qemu_fdt_setprop_string_array(ms->fdt, aplic_name, "compatible",
+ (char **)&aplic_compat,
+ ARRAY_SIZE(aplic_compat));
+ qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#address-cells",
+ FDT_APLIC_ADDR_CELLS);
qemu_fdt_setprop_cell(ms->fdt, aplic_name,
"#interrupt-cells", FDT_APLIC_INT_CELLS);
qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0);
@@ -628,7 +648,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
if (aplic_child_phandle) {
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children",
aplic_child_phandle);
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate",
+ qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegation",
aplic_child_phandle, 0x1,
VIRT_IRQCHIP_NUM_SOURCES);
}
@@ -646,7 +666,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
uint32_t *aplic_phandles,
int num_harts)
{
- g_autofree char *aplic_name = NULL;
unsigned long aplic_addr;
MachineState *ms = MACHINE(s);
uint32_t aplic_m_phandle, aplic_s_phandle;
@@ -672,9 +691,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
aplic_s_phandle, 0,
false, num_harts);
- aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
-
if (!socket) {
+ g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr);
platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
memmap[VIRT_PLATFORM_BUS].base,
memmap[VIRT_PLATFORM_BUS].size,
@@ -1746,6 +1764,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
mc->init = virt_machine_init;
mc->max_cpus = VIRT_CPUS_MAX;
mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
+ mc->block_default_type = IF_VIRTIO;
+ mc->no_cdrom = 1;
mc->pci_allow_0_address = true;
mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index fb8c1ac..a7d682e 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -31,9 +31,10 @@ static void ccw_device_refill_ids(CcwDevice *dev)
dev->subch_id.valid = true;
}
-static void ccw_device_realize(CcwDevice *dev, Error **errp)
+static bool ccw_device_realize(CcwDevice *dev, Error **errp)
{
ccw_device_refill_ids(dev);
+ return true;
}
static Property ccw_device_properties[] = {
diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h
index 6dff952..5feeb0e 100644
--- a/hw/s390x/ccw-device.h
+++ b/hw/s390x/ccw-device.h
@@ -36,7 +36,7 @@ extern const VMStateDescription vmstate_ccw_dev;
struct CCWDeviceClass {
DeviceClass parent_class;
void (*unplug)(HotplugHandler *, DeviceState *, Error **);
- void (*realize)(CcwDevice *, Error **);
+ bool (*realize)(CcwDevice *, Error **);
void (*refill_ids)(CcwDevice *);
};
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
index 5261e66..3c09750 100644
--- a/hw/s390x/s390-ccw.c
+++ b/hw/s390x/s390-ccw.c
@@ -71,7 +71,7 @@ IOInstEnding s390_ccw_store(SubchDev *sch)
return ret;
}
-static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
+static bool s390_ccw_get_dev_info(S390CCWDevice *cdev,
char *sysfsdev,
Error **errp)
{
@@ -84,12 +84,12 @@ static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
error_setg(errp, "No host device provided");
error_append_hint(errp,
"Use -device vfio-ccw,sysfsdev=PATH_TO_DEVICE\n");
- return;
+ return false;
}
if (!realpath(sysfsdev, dev_path)) {
error_setg_errno(errp, errno, "Host device '%s' not found", sysfsdev);
- return;
+ return false;
}
cdev->mdevid = g_path_get_basename(dev_path);
@@ -98,30 +98,29 @@ static void s390_ccw_get_dev_info(S390CCWDevice *cdev,
tmp = g_path_get_basename(tmp_dir);
if (sscanf(tmp, "%2x.%1x.%4x", &cssid, &ssid, &devid) != 3) {
error_setg_errno(errp, errno, "Failed to read %s", tmp);
- return;
+ return false;
}
cdev->hostid.cssid = cssid;
cdev->hostid.ssid = ssid;
cdev->hostid.devid = devid;
cdev->hostid.valid = true;
+ return true;
}
-static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp)
+static bool s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp)
{
CcwDevice *ccw_dev = CCW_DEVICE(cdev);
CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev);
DeviceState *parent = DEVICE(ccw_dev);
SubchDev *sch;
int ret;
- Error *err = NULL;
- s390_ccw_get_dev_info(cdev, sysfsdev, &err);
- if (err) {
- goto out_err_propagate;
+ if (!s390_ccw_get_dev_info(cdev, sysfsdev, errp)) {
+ return false;
}
- sch = css_create_sch(ccw_dev->devno, &err);
+ sch = css_create_sch(ccw_dev->devno, errp);
if (!sch) {
goto out_mdevid_free;
}
@@ -132,19 +131,18 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp)
ccw_dev->sch = sch;
ret = css_sch_build_schib(sch, &cdev->hostid);
if (ret) {
- error_setg_errno(&err, -ret, "%s: Failed to build initial schib",
+ error_setg_errno(errp, -ret, "%s: Failed to build initial schib",
__func__);
goto out_err;
}
- ck->realize(ccw_dev, &err);
- if (err) {
+ if (!ck->realize(ccw_dev, errp)) {
goto out_err;
}
css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
parent->hotplugged, 1);
- return;
+ return true;
out_err:
css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
@@ -152,8 +150,7 @@ out_err:
g_free(sch);
out_mdevid_free:
g_free(cdev->mdevid);
-out_err_propagate:
- error_propagate(errp, err);
+ return false;
}
static void s390_ccw_unrealize(S390CCWDevice *cdev)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 3d0bc3e..cd063f8 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -47,6 +47,7 @@
#include "migration/blocker.h"
#include "qapi/visitor.h"
#include "hw/s390x/cpu-topology.h"
+#include CONFIG_DEVICES
static Error *pv_mig_blocker;
@@ -1126,6 +1127,8 @@ static void ccw_machine_2_12_class_options(MachineClass *mc)
}
DEFINE_CCW_MACHINE(2_12, "2.12", false);
+#ifdef CONFIG_S390X_LEGACY_CPUS
+
static void ccw_machine_2_11_instance_options(MachineState *machine)
{
static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 };
@@ -1272,6 +1275,8 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
}
DEFINE_CCW_MACHINE(2_4, "2.4", false);
+#endif
+
static void ccw_machine_register_types(void)
{
type_register_static(&ccw_machine_info);
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index c12531a..0c4354e 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -172,7 +172,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
* Report this error, but do not make it a failing condition.
* Lack of this IRQ in the host does not prevent normal operation.
*/
- error_report_err(err);
+ warn_report_err(err);
}
return;
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 2600e62..1f8e127 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -582,14 +582,13 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
/* Call the class init function for subchannel. */
if (cdc->realize) {
- cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
- if (err) {
- goto out_err_propagate;
+ if (!cdc->realize(cdev, vcdev->vdev.sysfsdev, errp)) {
+ return;
}
}
if (!vfio_device_get_name(vbasedev, errp)) {
- return;
+ goto out_unrealize;
}
if (!vfio_attach_device(cdev->mdevid, vbasedev,
@@ -597,17 +596,17 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
goto out_attach_dev_err;
}
- if (!vfio_ccw_get_region(vcdev, &err)) {
+ if (!vfio_ccw_get_region(vcdev, errp)) {
goto out_region_err;
}
- if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err)) {
+ if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, errp)) {
goto out_io_notifier_err;
}
if (vcdev->crw_region) {
if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX,
- &err)) {
+ errp)) {
goto out_irq_notifier_err;
}
}
@@ -617,7 +616,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
* Report this error, but do not make it a failing condition.
* Lack of this IRQ in the host does not prevent normal operation.
*/
- error_report_err(err);
+ warn_report_err(err);
}
return;
@@ -632,11 +631,10 @@ out_region_err:
vfio_detach_device(vbasedev);
out_attach_dev_err:
g_free(vbasedev->name);
+out_unrealize:
if (cdc->unrealize) {
cdc->unrealize(cdev);
}
-out_err_propagate:
- error_propagate(errp, err);
}
static void vfio_ccw_unrealize(DeviceState *dev)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f9619a1..7cdb969 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -630,16 +630,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
goto fail;
}
- if (bcontainer->iova_ranges) {
- ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
- bcontainer->iova_ranges,
- &err);
- if (ret) {
- g_free(giommu);
- goto fail;
- }
- }
-
ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
&err);
if (ret) {
@@ -849,20 +839,11 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
return false;
}
-static void vfio_dirty_tracking_update(MemoryListener *listener,
- MemoryRegionSection *section)
+static void vfio_dirty_tracking_update_range(VFIODirtyRanges *range,
+ hwaddr iova, hwaddr end,
+ bool update_pci)
{
- VFIODirtyRangesListener *dirty = container_of(listener,
- VFIODirtyRangesListener,
- listener);
- VFIODirtyRanges *range = &dirty->ranges;
- hwaddr iova, end, *min, *max;
-
- if (!vfio_listener_valid_section(section, "tracking_update") ||
- !vfio_get_section_iova_range(dirty->bcontainer, section,
- &iova, &end, NULL)) {
- return;
- }
+ hwaddr *min, *max;
/*
* The address space passed to the dirty tracker is reduced to three ranges:
@@ -883,8 +864,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
* The alternative would be an IOVATree but that has a much bigger runtime
* overhead and unnecessary complexity.
*/
- if (vfio_section_is_vfio_pci(section, dirty->bcontainer) &&
- iova >= UINT32_MAX) {
+ if (update_pci && iova >= UINT32_MAX) {
min = &range->minpci64;
max = &range->maxpci64;
} else {
@@ -899,7 +879,23 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
}
trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
- return;
+}
+
+static void vfio_dirty_tracking_update(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIODirtyRangesListener *dirty =
+ container_of(listener, VFIODirtyRangesListener, listener);
+ hwaddr iova, end;
+
+ if (!vfio_listener_valid_section(section, "tracking_update") ||
+ !vfio_get_section_iova_range(dirty->bcontainer, section,
+ &iova, &end, NULL)) {
+ return;
+ }
+
+ vfio_dirty_tracking_update_range(&dirty->ranges, iova, end,
+ vfio_section_is_vfio_pci(section, dirty->bcontainer));
}
static const MemoryListener vfio_dirty_tracking_listener = {
@@ -1030,7 +1026,7 @@ static void vfio_device_feature_dma_logging_start_destroy(
g_free(feature);
}
-static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
Error **errp)
{
struct vfio_device_feature *feature;
@@ -1043,7 +1039,7 @@ static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
&ranges);
if (!feature) {
error_setg_errno(errp, errno, "Failed to prepare DMA logging");
- return -errno;
+ return false;
}
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
@@ -1068,7 +1064,7 @@ out:
vfio_device_feature_dma_logging_start_destroy(feature);
- return ret;
+ return ret == 0;
}
static bool vfio_listener_log_global_start(MemoryListener *listener,
@@ -1077,18 +1073,18 @@ static bool vfio_listener_log_global_start(MemoryListener *listener,
ERRP_GUARD();
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
listener);
- int ret;
+ bool ret;
if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
ret = vfio_devices_dma_logging_start(bcontainer, errp);
} else {
- ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp);
+ ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp) == 0;
}
- if (ret) {
+ if (!ret) {
error_prepend(errp, "vfio: Could not start dirty page tracking - ");
}
- return !ret;
+ return ret;
}
static void vfio_listener_log_global_stop(MemoryListener *listener)
@@ -1306,37 +1302,50 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
&vrdl);
}
+static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIOGuestIOMMU *giommu;
+ bool found = false;
+ Int128 llend;
+ vfio_giommu_dirty_notifier gdn;
+ int idx;
+
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
+ if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
+ giommu->n.start == section->offset_within_region) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ return 0;
+ }
+
+ gdn.giommu = giommu;
+ idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
+ MEMTXATTRS_UNSPECIFIED);
+
+ llend = int128_add(int128_make64(section->offset_within_region),
+ section->size);
+ llend = int128_sub(llend, int128_one());
+
+ iommu_notifier_init(&gdn.n, vfio_iommu_map_dirty_notify, IOMMU_NOTIFIER_MAP,
+ section->offset_within_region, int128_get64(llend),
+ idx);
+ memory_region_iommu_replay(giommu->iommu_mr, &gdn.n);
+
+ return 0;
+}
+
static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section, Error **errp)
{
ram_addr_t ram_addr;
if (memory_region_is_iommu(section->mr)) {
- VFIOGuestIOMMU *giommu;
-
- QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
- if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
- giommu->n.start == section->offset_within_region) {
- Int128 llend;
- vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
- int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
- MEMTXATTRS_UNSPECIFIED);
-
- llend = int128_add(int128_make64(section->offset_within_region),
- section->size);
- llend = int128_sub(llend, int128_one());
-
- iommu_notifier_init(&gdn.n,
- vfio_iommu_map_dirty_notify,
- IOMMU_NOTIFIER_MAP,
- section->offset_within_region,
- int128_get64(llend),
- idx);
- memory_region_iommu_replay(giommu->iommu_mr, &gdn.n);
- break;
- }
- }
- return 0;
+ return vfio_sync_iommu_dirty_bitmap(bcontainer, section);
} else if (memory_region_has_ram_discard_manager(section->mr)) {
int ret;
@@ -1499,6 +1508,13 @@ void vfio_put_address_space(VFIOAddressSpace *space)
}
}
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainerBase *bcontainer)
+{
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
+ bcontainer->space = space;
+}
+
struct vfio_device_info *vfio_get_device_info(int fd)
{
struct vfio_device_info *info;
@@ -1528,6 +1544,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
{
const VFIOIOMMUClass *ops =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
+ HostIOMMUDevice *hiod;
if (vbasedev->iommufd) {
ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1535,7 +1552,19 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
assert(ops);
- return ops->attach_device(name, vbasedev, as, errp);
+ if (!ops->attach_device(name, vbasedev, as, errp)) {
+ return false;
+ }
+
+ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+ if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+ object_unref(hiod);
+ ops->detach_device(vbasedev);
+ return false;
+ }
+ vbasedev->hiod = hiod;
+
+ return true;
}
void vfio_detach_device(VFIODevice *vbasedev)
@@ -1543,5 +1572,6 @@ void vfio_detach_device(VFIODevice *vbasedev)
if (!vbasedev->bcontainer) {
return;
}
- vbasedev->bcontainer->ops->detach_device(vbasedev);
+ object_unref(vbasedev->hiod);
+ VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
}
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 760d9d0..50b1664 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -19,73 +19,73 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly)
{
- g_assert(bcontainer->ops->dma_map);
- return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ g_assert(vioc->dma_map);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb)
{
- g_assert(bcontainer->ops->dma_unmap);
- return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ g_assert(vioc->dma_unmap);
+ return vioc->dma_unmap(bcontainer, iova, size, iotlb);
}
bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,
Error **errp)
{
- if (!bcontainer->ops->add_window) {
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ if (!vioc->add_window) {
return true;
}
- return bcontainer->ops->add_window(bcontainer, section, errp);
+ return vioc->add_window(bcontainer, section, errp);
}
void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
- if (!bcontainer->ops->del_window) {
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ if (!vioc->del_window) {
return;
}
- return bcontainer->ops->del_window(bcontainer, section);
+ return vioc->del_window(bcontainer, section);
}
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
bool start, Error **errp)
{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
if (!bcontainer->dirty_pages_supported) {
return 0;
}
- g_assert(bcontainer->ops->set_dirty_page_tracking);
- return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp);
+ g_assert(vioc->set_dirty_page_tracking);
+ return vioc->set_dirty_page_tracking(bcontainer, start, errp);
}
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
{
- g_assert(bcontainer->ops->query_dirty_bitmap);
- return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size,
- errp);
-}
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
- const VFIOIOMMUClass *ops)
-{
- bcontainer->ops = ops;
- bcontainer->space = space;
- bcontainer->error = NULL;
- bcontainer->dirty_pages_supported = false;
- bcontainer->dma_max_mappings = 0;
- bcontainer->iova_ranges = NULL;
- QLIST_INIT(&bcontainer->giommu_list);
- QLIST_INIT(&bcontainer->vrdl_list);
+ g_assert(vioc->query_dirty_bitmap);
+ return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
+ errp);
}
-void vfio_container_destroy(VFIOContainerBase *bcontainer)
+static void vfio_container_instance_finalize(Object *obj)
{
+ VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
VFIOGuestIOMMU *giommu, *tmp;
QLIST_REMOVE(bcontainer, next);
@@ -100,11 +100,27 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
g_list_free_full(bcontainer->iova_ranges, g_free);
}
+static void vfio_container_instance_init(Object *obj)
+{
+ VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
+
+ bcontainer->error = NULL;
+ bcontainer->dirty_pages_supported = false;
+ bcontainer->dma_max_mappings = 0;
+ bcontainer->iova_ranges = NULL;
+ QLIST_INIT(&bcontainer->giommu_list);
+ QLIST_INIT(&bcontainer->vrdl_list);
+}
+
static const TypeInfo types[] = {
{
.name = TYPE_VFIO_IOMMU,
- .parent = TYPE_INTERFACE,
+ .parent = TYPE_OBJECT,
+ .instance_init = vfio_container_instance_init,
+ .instance_finalize = vfio_container_instance_finalize,
+ .instance_size = sizeof(VFIOContainerBase),
.class_size = sizeof(VFIOIOMMUClass),
+ .abstract = true,
},
};
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 096cc97..2e7ecdf 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -354,7 +354,7 @@ static void vfio_kvm_device_del_group(VFIOGroup *group)
/*
* vfio_get_iommu_type - selects the richest iommu_type (v2 first)
*/
-static int vfio_get_iommu_type(VFIOContainer *container,
+static int vfio_get_iommu_type(int container_fd,
Error **errp)
{
int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
@@ -362,7 +362,7 @@ static int vfio_get_iommu_type(VFIOContainer *container,
int i;
for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
- if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+ if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
return iommu_types[i];
}
}
@@ -373,67 +373,70 @@ static int vfio_get_iommu_type(VFIOContainer *container,
/*
* vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
*/
-static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp)
+static const char *vfio_get_iommu_class_name(int iommu_type)
{
- ObjectClass *klass = NULL;
-
switch (iommu_type) {
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
- klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY);
+ return TYPE_VFIO_IOMMU_LEGACY;
break;
case VFIO_SPAPR_TCE_v2_IOMMU:
case VFIO_SPAPR_TCE_IOMMU:
- klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR);
+ return TYPE_VFIO_IOMMU_SPAPR;
break;
default:
g_assert_not_reached();
};
-
- return VFIO_IOMMU_CLASS(klass);
}
-static bool vfio_set_iommu(VFIOContainer *container, int group_fd,
- VFIOAddressSpace *space, Error **errp)
+static bool vfio_set_iommu(int container_fd, int group_fd,
+ int *iommu_type, Error **errp)
{
- int iommu_type;
- const VFIOIOMMUClass *vioc;
-
- iommu_type = vfio_get_iommu_type(container, errp);
- if (iommu_type < 0) {
- return false;
- }
-
- if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) {
error_setg_errno(errp, errno, "Failed to set group container");
return false;
}
- while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
- if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
+ if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
/*
* On sPAPR, despite the IOMMU subdriver always advertises v1 and
* v2, the running platform may not support v2 and there is no
* way to guess it until an IOMMU group gets added to the container.
* So in case it fails with v2, try v1 as a fallback.
*/
- iommu_type = VFIO_SPAPR_TCE_IOMMU;
+ *iommu_type = VFIO_SPAPR_TCE_IOMMU;
continue;
}
error_setg_errno(errp, errno, "Failed to set iommu for container");
return false;
}
- container->iommu_type = iommu_type;
+ return true;
+}
- vioc = vfio_get_iommu_class(iommu_type, errp);
- if (!vioc) {
- error_setg(errp, "No available IOMMU models");
- return false;
+static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
+ Error **errp)
+{
+ int iommu_type;
+ const char *vioc_name;
+ VFIOContainer *container;
+
+ iommu_type = vfio_get_iommu_type(fd, errp);
+ if (iommu_type < 0) {
+ return NULL;
}
- vfio_container_init(&container->bcontainer, space, vioc);
- return true;
+ if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+ return NULL;
+ }
+
+ vioc_name = vfio_get_iommu_class_name(iommu_type);
+
+ container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
+ container->fd = fd;
+ container->iommu_type = iommu_type;
+ return container;
}
static int vfio_get_iommu_info(VFIOContainer *container,
@@ -542,6 +545,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
VFIOContainerBase *bcontainer;
int ret, fd;
VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
space = vfio_get_address_space(as);
@@ -610,13 +614,11 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
goto close_fd_exit;
}
- container = g_malloc0(sizeof(*container));
- container->fd = fd;
- bcontainer = &container->bcontainer;
-
- if (!vfio_set_iommu(container, group->fd, space, errp)) {
- goto free_container_exit;
+ container = vfio_create_container(fd, group, errp);
+ if (!container) {
+ goto close_fd_exit;
}
+ bcontainer = &container->bcontainer;
if (!vfio_cpr_register_container(bcontainer, errp)) {
goto free_container_exit;
@@ -628,16 +630,16 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
goto unregister_container_exit;
}
- assert(bcontainer->ops->setup);
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
- if (!bcontainer->ops->setup(bcontainer, errp)) {
+ if (!vioc->setup(bcontainer, errp)) {
goto enable_discards_exit;
}
vfio_kvm_device_add_group(group);
- QLIST_INIT(&container->group_list);
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
+ vfio_address_space_insert(space, bcontainer);
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
@@ -659,8 +661,8 @@ listener_release_exit:
QLIST_REMOVE(bcontainer, next);
vfio_kvm_device_del_group(group);
memory_listener_unregister(&bcontainer->listener);
- if (bcontainer->ops->release) {
- bcontainer->ops->release(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
}
enable_discards_exit:
@@ -670,7 +672,7 @@ unregister_container_exit:
vfio_cpr_unregister_container(bcontainer);
free_container_exit:
- g_free(container);
+ object_unref(container);
close_fd_exit:
close(fd);
@@ -685,6 +687,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
{
VFIOContainer *container = group->container;
VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
QLIST_REMOVE(group, container_next);
group->container = NULL;
@@ -696,8 +699,8 @@ static void vfio_disconnect_container(VFIOGroup *group)
*/
if (QLIST_EMPTY(&container->group_list)) {
memory_listener_unregister(&bcontainer->listener);
- if (bcontainer->ops->release) {
- bcontainer->ops->release(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
}
}
@@ -709,12 +712,10 @@ static void vfio_disconnect_container(VFIOGroup *group)
if (QLIST_EMPTY(&container->group_list)) {
VFIOAddressSpace *space = bcontainer->space;
- vfio_container_destroy(bcontainer);
-
trace_vfio_disconnect_container(container->fd);
vfio_cpr_unregister_container(bcontainer);
close(container->fd);
- g_free(container);
+ object_unref(container);
vfio_put_address_space(space);
}
@@ -1126,6 +1127,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO;
+
vioc->setup = vfio_legacy_setup;
vioc->dma_map = vfio_legacy_dma_map;
vioc->dma_unmap = vfio_legacy_dma_unmap;
@@ -1136,12 +1139,75 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)
vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
};
+static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+ Error **errp)
+{
+ VFIODevice *vdev = opaque;
+
+ hiod->name = g_strdup(vdev->name);
+ hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
+ hiod->agent = opaque;
+
+ return true;
+}
+
+static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
+ Error **errp)
+{
+ HostIOMMUDeviceCaps *caps = &hiod->caps;
+
+ switch (cap) {
+ case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+ return caps->aw_bits;
+ default:
+ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+ return -EINVAL;
+ }
+}
+
+static GList *
+hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp)
+{
+ VFIODevice *vdev = hiod->agent;
+ GList *l = NULL;
+
+ g_assert(vdev);
+
+ if (vdev->bcontainer) {
+ l = g_list_copy(vdev->bcontainer->iova_ranges);
+ }
+
+ return l;
+}
+
+static void vfio_iommu_legacy_instance_init(Object *obj)
+{
+ VFIOContainer *container = VFIO_IOMMU_LEGACY(obj);
+
+ QLIST_INIT(&container->group_list);
+}
+
+static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
+{
+ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hioc->realize = hiod_legacy_vfio_realize;
+ hioc->get_cap = hiod_legacy_vfio_get_cap;
+ hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
+};
+
static const TypeInfo types[] = {
{
.name = TYPE_VFIO_IOMMU_LEGACY,
.parent = TYPE_VFIO_IOMMU,
+ .instance_init = vfio_iommu_legacy_instance_init,
+ .instance_size = sizeof(VFIOContainer),
.class_init = vfio_iommu_legacy_class_init,
- },
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+ .parent = TYPE_HOST_IOMMU_DEVICE,
+ .class_init = hiod_legacy_vfio_class_init,
+ }
};
DEFINE_TYPES(types)
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 27ea26a..b14edd4 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -658,3 +658,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->ram_block_discard_allowed = ram_discard;
}
+
+int vfio_device_get_aw_bits(VFIODevice *vdev)
+{
+ /*
+ * iova_ranges is a sorted list. For old kernels that support
+ * VFIO but not support query of iova ranges, iova_ranges is NULL,
+ * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned.
+ */
+ GList *l = g_list_last(vdev->bcontainer->iova_ranges);
+
+ if (l) {
+ Range *range = l->data;
+ return range_get_last_bit(range) + 1;
+ }
+
+ return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 554f9a6..c2f158e 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -237,9 +237,8 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
return;
}
memory_listener_unregister(&bcontainer->listener);
- vfio_container_destroy(bcontainer);
iommufd_backend_free_id(container->be, container->ioas_id);
- g_free(container);
+ object_unref(container);
}
static int iommufd_cdev_ram_block_discard_disable(bool state)
@@ -324,7 +323,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
/* try to attach to an existing container in this space */
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
- if (bcontainer->ops != iommufd_vioc ||
+ if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc ||
vbasedev->iommufd != container->be) {
continue;
}
@@ -352,13 +351,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
- container = g_malloc0(sizeof(*container));
+ container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
bcontainer = &container->bcontainer;
- vfio_container_init(bcontainer, space, iommufd_vioc);
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
+ vfio_address_space_insert(space, bcontainer);
if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
goto err_attach_container;
@@ -465,7 +463,7 @@ static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
- if (vbasedev_iter->bcontainer->ops != iommufd_vioc) {
+ if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) {
continue;
}
if (devid == vbasedev_iter->devid) {
@@ -612,6 +610,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+ vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO;
+
vioc->dma_map = iommufd_cdev_map;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
@@ -619,12 +619,64 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
};
+static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+ Error **errp)
+{
+ VFIODevice *vdev = opaque;
+ HostIOMMUDeviceCaps *caps = &hiod->caps;
+ enum iommu_hw_info_type type;
+ union {
+ struct iommu_hw_info_vtd vtd;
+ } data;
+
+ hiod->agent = opaque;
+
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
+ &type, &data, sizeof(data), errp)) {
+ return false;
+ }
+
+ hiod->name = g_strdup(vdev->name);
+ caps->type = type;
+ caps->aw_bits = vfio_device_get_aw_bits(vdev);
+
+ return true;
+}
+
+static GList *
+hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp)
+{
+ VFIODevice *vdev = hiod->agent;
+ GList *l = NULL;
+
+ g_assert(vdev);
+
+ if (vdev->bcontainer) {
+ l = g_list_copy(vdev->bcontainer->iova_ranges);
+ }
+
+ return l;
+}
+
+static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data)
+{
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hiodc->realize = hiod_iommufd_vfio_realize;
+ hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
+};
+
static const TypeInfo types[] = {
{
.name = TYPE_VFIO_IOMMU_IOMMUFD,
.parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOIOMMUFDContainer),
.class_init = vfio_iommu_iommufd_class_init,
- },
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
+ .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+ .class_init = hiod_iommufd_vfio_class_init,
+ }
};
DEFINE_TYPES(types)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 74a79bd..e03d9f3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2511,9 +2511,9 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
{
VFIODevice *vbasedev = &vdev->vbasedev;
- const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops;
+ const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer);
- return ops->pci_hot_reset(vbasedev, single);
+ return vioc->pci_hot_reset(vbasedev, single);
}
/*
@@ -3121,10 +3121,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_bars_register(vdev);
- if (!vfio_add_capabilities(vdev, errp)) {
+ if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) {
+ error_prepend(errp, "Failed to set iommu_device: ");
goto out_teardown;
}
+ if (!vfio_add_capabilities(vdev, errp)) {
+ goto out_unset_idev;
+ }
+
if (vdev->vga) {
vfio_vga_quirk_setup(vdev);
}
@@ -3141,7 +3146,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
error_setg(errp,
"cannot support IGD OpRegion feature on hotplugged "
"device");
- goto out_teardown;
+ goto out_unset_idev;
}
ret = vfio_get_dev_region_info(vbasedev,
@@ -3150,11 +3155,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
if (ret) {
error_setg_errno(errp, -ret,
"does not support requested IGD OpRegion feature");
- goto out_teardown;
+ goto out_unset_idev;
}
if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
- goto out_teardown;
+ goto out_unset_idev;
}
}
@@ -3238,6 +3243,8 @@ out_deregister:
if (vdev->intx.mmap_timer) {
timer_free(vdev->intx.mmap_timer);
}
+out_unset_idev:
+ pci_device_unset_iommu_device(pdev);
out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
@@ -3266,6 +3273,7 @@ static void vfio_instance_finalize(Object *obj)
static void vfio_exitfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
@@ -3280,7 +3288,8 @@ static void vfio_exitfn(PCIDevice *pdev)
vfio_teardown_msi(vdev);
vfio_pci_disable_rp_atomics(vdev);
vfio_bars_exit(vdev);
- vfio_migration_exit(&vdev->vbasedev);
+ vfio_migration_exit(vbasedev);
+ pci_device_unset_iommu_device(pdev);
}
static void vfio_pci_reset(DeviceState *dev)
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 47b040f..018bd20 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -30,6 +30,8 @@ typedef struct VFIOSpaprContainer {
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
} VFIOSpaprContainer;
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR);
+
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
{
if (memory_region_is_iommu(section->mr)) {
@@ -548,6 +550,7 @@ static const TypeInfo types[] = {
{
.name = TYPE_VFIO_IOMMU_SPAPR,
.parent = TYPE_VFIO_IOMMU_LEGACY,
+ .instance_size = sizeof(VFIOSpaprContainer),
.class_init = vfio_iommu_spapr_class_init,
},
};
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 1326c6e..b9a7ddc 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping {
uint32_t flags;
} VirtIOIOMMUMapping;
+struct hiod_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
{
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
@@ -462,8 +467,195 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
return &sdev->as;
}
+static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct hiod_key *key1 = v1;
+ const struct hiod_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static guint hiod_hash(gconstpointer v)
+{
+ const struct hiod_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
+}
+
+static void hiod_destroy(gpointer v)
+{
+ object_unref(v);
+}
+
+static HostIOMMUDevice *
+get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
+ struct hiod_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ return g_hash_table_lookup(viommu->host_iommu_devices, &key);
+}
+
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+ GList *l;
+ int i = 0;
+
+ /* free the existing list and rebuild it from scratch */
+ g_list_free_full(sdev->resv_regions, g_free);
+ sdev->resv_regions = NULL;
+
+ /* First add host reserved regions if any, all tagged as RESERVED */
+ for (l = sdev->host_resv_ranges; l; l = l->next) {
+ ReservedRegion *reg = g_new0(ReservedRegion, 1);
+ Range *r = (Range *)l->data;
+
+ reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+ range_set_bounds(&reg->range, range_lob(r), range_upb(r));
+ sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+ trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+ range_lob(&reg->range),
+ range_upb(&reg->range));
+ i++;
+ }
+ /*
+ * then add higher priority reserved regions set by the machine
+ * through properties
+ */
+ add_prop_resv_regions(sdev);
+ return 0;
+}
+
+static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+ int devfn, GList *iova_ranges,
+ Error **errp)
+{
+ IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+ IOMMUDevice *sdev;
+ GList *current_ranges;
+ GList *l, *tmp, *new_ranges = NULL;
+ int ret = -EINVAL;
+
+ if (!sbus) {
+ error_report("%s no sbus", __func__);
+ }
+
+ sdev = sbus->pbdev[devfn];
+
+ current_ranges = sdev->host_resv_ranges;
+
+ g_assert(!sdev->probe_done);
+
+ /* check that each new resv region is included in an existing one */
+ if (sdev->host_resv_ranges) {
+ range_inverse_array(iova_ranges,
+ &new_ranges,
+ 0, UINT64_MAX);
+
+ for (tmp = new_ranges; tmp; tmp = tmp->next) {
+ Range *newr = (Range *)tmp->data;
+ bool included = false;
+
+ for (l = current_ranges; l; l = l->next) {
+ Range * r = (Range *)l->data;
+
+ if (range_contains_range(r, newr)) {
+ included = true;
+ break;
+ }
+ }
+ if (!included) {
+ goto error;
+ }
+ }
+ /* all new reserved ranges are included in existing ones */
+ ret = 0;
+ goto out;
+ }
+
+ range_inverse_array(iova_ranges,
+ &sdev->host_resv_ranges,
+ 0, UINT64_MAX);
+ rebuild_resv_regions(sdev);
+
+ return 0;
+error:
+ error_setg(errp, "%s Conflicting host reserved ranges set!",
+ __func__);
+out:
+ g_list_free_full(new_ranges, g_free);
+ return ret;
+}
+
+static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ VirtIOIOMMU *viommu = opaque;
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ struct hiod_key *new_key;
+ GList *host_iova_ranges = NULL;
+
+ assert(hiod);
+
+ if (get_host_iommu_device(viommu, bus, devfn)) {
+ error_setg(errp, "Host IOMMU device already exists");
+ return false;
+ }
+
+ if (hiodc->get_iova_ranges) {
+ int ret;
+ host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
+ if (!host_iova_ranges) {
+ return true; /* some old kernels may not support that capability */
+ }
+ ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus,
+ hiod->aliased_devfn,
+ host_iova_ranges, errp);
+ if (ret) {
+ g_list_free_full(host_iova_ranges, g_free);
+ return false;
+ }
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod);
+ g_list_free_full(host_iova_ranges, g_free);
+
+ return true;
+}
+
+static void
+virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ VirtIOIOMMU *viommu = opaque;
+ HostIOMMUDevice *hiod;
+ struct hiod_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key);
+ if (!hiod) {
+ return;
+ }
+
+ g_hash_table_remove(viommu->host_iommu_devices, &key);
+}
+
static const PCIIOMMUOps virtio_iommu_ops = {
.get_address_space = virtio_iommu_find_add_as,
+ .set_iommu_device = virtio_iommu_set_iommu_device,
+ .unset_iommu_device = virtio_iommu_unset_iommu_device,
};
static int virtio_iommu_attach(VirtIOIOMMU *s,
@@ -1159,106 +1351,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
return 0;
}
-/**
- * rebuild_resv_regions: rebuild resv regions with both the
- * info of host resv ranges and property set resv ranges
- */
-static int rebuild_resv_regions(IOMMUDevice *sdev)
-{
- GList *l;
- int i = 0;
-
- /* free the existing list and rebuild it from scratch */
- g_list_free_full(sdev->resv_regions, g_free);
- sdev->resv_regions = NULL;
-
- /* First add host reserved regions if any, all tagged as RESERVED */
- for (l = sdev->host_resv_ranges; l; l = l->next) {
- ReservedRegion *reg = g_new0(ReservedRegion, 1);
- Range *r = (Range *)l->data;
-
- reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
- range_set_bounds(&reg->range, range_lob(r), range_upb(r));
- sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
- trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
- range_lob(&reg->range),
- range_upb(&reg->range));
- i++;
- }
- /*
- * then add higher priority reserved regions set by the machine
- * through properties
- */
- add_prop_resv_regions(sdev);
- return 0;
-}
-
-/**
- * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
- *
- * The function turns those into reserved ranges. Once some
- * reserved ranges have been set, new reserved regions cannot be
- * added outside of the original ones.
- *
- * @mr: IOMMU MR
- * @iova_ranges: list of usable IOVA ranges
- * @errp: error handle
- */
-static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr,
- GList *iova_ranges,
- Error **errp)
-{
- IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
- GList *current_ranges = sdev->host_resv_ranges;
- GList *l, *tmp, *new_ranges = NULL;
- int ret = -EINVAL;
-
- /* check that each new resv region is included in an existing one */
- if (sdev->host_resv_ranges) {
- range_inverse_array(iova_ranges,
- &new_ranges,
- 0, UINT64_MAX);
-
- for (tmp = new_ranges; tmp; tmp = tmp->next) {
- Range *newr = (Range *)tmp->data;
- bool included = false;
-
- for (l = current_ranges; l; l = l->next) {
- Range * r = (Range *)l->data;
-
- if (range_contains_range(r, newr)) {
- included = true;
- break;
- }
- }
- if (!included) {
- goto error;
- }
- }
- /* all new reserved ranges are included in existing ones */
- ret = 0;
- goto out;
- }
-
- if (sdev->probe_done) {
- warn_report("%s: Notified about new host reserved regions after probe",
- mr->parent_obj.name);
- }
-
- range_inverse_array(iova_ranges,
- &sdev->host_resv_ranges,
- 0, UINT64_MAX);
- rebuild_resv_regions(sdev);
-
- return 0;
-error:
- error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!",
- mr->parent_obj.name);
-out:
- g_list_free_full(new_ranges, g_free);
- return ret;
-}
-
static void virtio_iommu_system_reset(void *opaque)
{
VirtIOIOMMU *s = opaque;
@@ -1357,6 +1449,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
+ s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal,
+ g_free, hiod_destroy);
+
if (s->primary_bus) {
pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
} else {
@@ -1581,7 +1676,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
imrc->replay = virtio_iommu_replay;
imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
- imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges;
}
static const TypeInfo virtio_iommu_info = {
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index eb14b91..1bd2c4e 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -1,15 +1,6 @@
#ifndef GDBSTUB_H
#define GDBSTUB_H
-#define DEFAULT_GDBSTUB_PORT "1234"
-
-/* GDB breakpoint/watchpoint types */
-#define GDB_BREAKPOINT_SW 0
-#define GDB_BREAKPOINT_HW 1
-#define GDB_WATCHPOINT_WRITE 2
-#define GDB_WATCHPOINT_READ 3
-#define GDB_WATCHPOINT_ACCESS 4
-
typedef struct GDBFeature {
const char *xmlname;
const char *xml;
@@ -144,4 +135,4 @@ void gdb_set_stop_cpu(CPUState *cpu);
/* in gdbstub-xml.c, generated by scripts/feature_to_c.py */
extern const GDBFeature gdb_static_features[];
-#endif
+#endif /* GDBSTUB_H */
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2d7c278..0903513 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -530,26 +530,6 @@ struct IOMMUMemoryRegionClass {
int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
uint64_t page_size_mask,
Error **errp);
- /**
- * @iommu_set_iova_ranges:
- *
- * Propagate information about the usable IOVA ranges for a given IOMMU
- * memory region. Used for example to propagate host physical device
- * reserved memory region constraints to the virtual IOMMU.
- *
- * Optional method: if this method is not provided, then the default IOVA
- * aperture is used.
- *
- * @iommu: the IOMMUMemoryRegion
- *
- * @iova_ranges: list of ordered IOVA ranges (at least one range)
- *
- * Returns 0 on success, or a negative error. In case of failure, the error
- * object must be created.
- */
- int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
- GList *iova_ranges,
- Error **errp);
};
typedef struct RamDiscardListener RamDiscardListener;
@@ -1952,18 +1932,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
Error **errp);
/**
- * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges
- * for a given IOMMU MR region
- *
- * @iommu: IOMMU memory region
- * @iova_ranges: list of ordered IOVA ranges (at least one range)
- * @errp: pointer to Error*, to store an error if it happens.
- */
-int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,
- GList *iova_ranges,
- Error **errp);
-
-/**
* memory_region_name: get a memory region's name
*
* Returns the string that was used to initialize the memory region.
diff --git a/include/gdbstub/enums.h b/include/gdbstub/enums.h
new file mode 100644
index 0000000..c4d54a1
--- /dev/null
+++ b/include/gdbstub/enums.h
@@ -0,0 +1,21 @@
+/*
+ * gdbstub enums
+ *
+ * Copyright (c) 2024 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GDBSTUB_ENUMS_H
+#define GDBSTUB_ENUMS_H
+
+#define DEFAULT_GDBSTUB_PORT "1234"
+
+/* GDB breakpoint/watchpoint types */
+#define GDB_BREAKPOINT_SW 0
+#define GDB_BREAKPOINT_HW 1
+#define GDB_WATCHPOINT_WRITE 2
+#define GDB_WATCHPOINT_READ 3
+#define GDB_WATCHPOINT_ACCESS 4
+
+#endif /* GDBSTUB_ENUMS_H */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 7fa0a69..1eb05c2 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -292,6 +292,8 @@ struct IntelIOMMUState {
/* list of registered notifiers */
QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
+ GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */
+
/* interrupt remapping */
bool intr_enabled; /* Whether guest enabled IR */
dma_addr_t intr_root; /* Interrupt remapping table pointer */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eaa3fc9..eb26cac 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -3,6 +3,7 @@
#include "exec/memory.h"
#include "sysemu/dma.h"
+#include "sysemu/host_iommu_device.h"
/* PCI includes legacy ISA access. */
#include "hw/isa/isa.h"
@@ -383,10 +384,45 @@ typedef struct PCIIOMMUOps {
*
* @devfn: device and function number
*/
- AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+ AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+ /**
+ * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
+ *
+ * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
+ * retrieve host information from the associated HostIOMMUDevice.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ *
+ * @dev: the #HostIOMMUDevice to attach.
+ *
+ * @errp: pass an Error out only when return false
+ *
+ * Returns: true if HostIOMMUDevice is attached or else false with errp set.
+ */
+ bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *dev, Error **errp);
+ /**
+ * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU
+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn);
} PCIIOMMUOps;
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);
/**
* pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 3db8391..c0dc41f 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -118,6 +118,7 @@ enum {
#define FDT_PLIC_ADDR_CELLS 0
#define FDT_PLIC_INT_CELLS 1
#define FDT_APLIC_INT_CELLS 2
+#define FDT_APLIC_ADDR_CELLS 0
#define FDT_IMSIC_INT_CELLS 0
#define FDT_MAX_INT_CELLS 2
#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h
index 2c807ee..2e0a709 100644
--- a/include/hw/s390x/s390-ccw.h
+++ b/include/hw/s390x/s390-ccw.h
@@ -31,7 +31,7 @@ struct S390CCWDevice {
struct S390CCWDeviceClass {
CCWDeviceClass parent_class;
- void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp);
+ bool (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp);
void (*unrealize)(S390CCWDevice *dev);
IOInstEnding (*handle_request) (SubchDev *sch);
int (*handle_halt) (SubchDev *sch);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 4cb1ab8..e8ddf92 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -31,6 +31,8 @@
#endif
#include "sysemu/sysemu.h"
#include "hw/vfio/vfio-container-base.h"
+#include "sysemu/host_iommu_device.h"
+#include "sysemu/iommufd.h"
#define VFIO_MSG_PREFIX "vfio %s: "
@@ -82,6 +84,8 @@ typedef struct VFIOContainer {
QLIST_HEAD(, VFIOGroup) group_list;
} VFIOContainer;
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
+
typedef struct VFIOHostDMAWindow {
hwaddr min_iova;
hwaddr max_iova;
@@ -97,6 +101,8 @@ typedef struct VFIOIOMMUFDContainer {
uint32_t ioas_id;
} VFIOIOMMUFDContainer;
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
+
typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODevice {
@@ -125,6 +131,7 @@ typedef struct VFIODevice {
OnOffAuto pre_copy_dirty_page_tracking;
bool dirty_pages_supported;
bool dirty_tracking;
+ HostIOMMUDevice *hiod;
int devid;
IOMMUFDBackend *iommufd;
} VFIODevice;
@@ -171,6 +178,10 @@ typedef struct VFIOGroup {
bool ram_block_discard_allowed;
} VFIOGroup;
+#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
+ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
+
typedef struct VFIODMABuf {
QemuDmaBuf *buf;
uint32_t pos_x, pos_y, pos_updates;
@@ -199,10 +210,8 @@ typedef struct VFIODisplay {
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
void vfio_put_address_space(VFIOAddressSpace *space);
-
-/* SPAPR specific */
-int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
-void vfio_spapr_container_deinit(VFIOContainer *container);
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainerBase *bcontainer);
void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
@@ -283,4 +292,5 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
DeviceState *dev, bool ram_discard);
+int vfio_device_get_aw_bits(VFIODevice *vdev);
#endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 2776481..419e45e 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -34,7 +34,7 @@ typedef struct VFIOAddressSpace {
* This is the base object for vfio container backends
*/
typedef struct VFIOContainerBase {
- const VFIOIOMMUClass *ops;
+ Object parent;
VFIOAddressSpace *space;
MemoryListener listener;
Error *error;
@@ -86,28 +86,18 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
-void vfio_container_init(VFIOContainerBase *bcontainer,
- VFIOAddressSpace *space,
- const VFIOIOMMUClass *ops);
-void vfio_container_destroy(VFIOContainerBase *bcontainer);
-
-
#define TYPE_VFIO_IOMMU "vfio-iommu"
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
-/*
- * VFIOContainerBase is not an abstract QOM object because it felt
- * unnecessary to expose all the IOMMU backends to the QEMU machine
- * and human interface. However, we can still abstract the IOMMU
- * backend handlers using a QOM interface class. This provides more
- * flexibility when referencing the various implementations.
- */
-DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU)
+OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
- InterfaceClass parent_class;
+ ObjectClass parent_class;
+
+ /* Properties */
+ const char *hiod_typename;
/* basic feature */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 83a52cc..bdb3da7 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -25,6 +25,7 @@
#include "hw/pci/pci.h"
#include "qom/object.h"
#include "qapi/qapi-types-virtio.h"
+#include "sysemu/host_iommu_device.h"
#define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci"
@@ -57,6 +58,7 @@ struct VirtIOIOMMU {
struct virtio_iommu_config config;
uint64_t features;
GHashTable *as_by_busptr;
+ GHashTable *host_iommu_devices;
IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
PCIBus *primary_bus;
ReservedRegion *prop_resv_regions;
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 95703d8..c71c705 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -661,6 +661,33 @@ void qemu_plugin_register_vcpu_mem_inline_per_vcpu(
qemu_plugin_u64 entry,
uint64_t imm);
+/**
+ * qemu_plugin_request_time_control() - request the ability to control time
+ *
+ * This grants the plugin the ability to control system time. Only one
+ * plugin can control time so if multiple plugins request the ability
+ * all but the first will fail.
+ *
+ * Returns an opaque handle or NULL if fails
+ */
+QEMU_PLUGIN_API
+const void *qemu_plugin_request_time_control(void);
+
+/**
+ * qemu_plugin_update_ns() - update system emulation time
+ * @handle: opaque handle returned by qemu_plugin_request_time_control()
+ * @time: time in nanoseconds
+ *
+ * This allows an appropriately authorised plugin (i.e. holding the
+ * time control handle) to move system time forward to @time. For
+ * user-mode emulation the time is not changed by this as all reported
+ * time comes from the host kernel.
+ *
+ * Start time is 0.
+ */
+QEMU_PLUGIN_API
+void qemu_plugin_update_ns(const void *handle, int64_t time);
+
typedef void
(*qemu_plugin_vcpu_syscall_cb_t)(qemu_plugin_id_t id, unsigned int vcpu_index,
int64_t num, uint64_t a1, uint64_t a2,
diff --git a/include/qemu/range.h b/include/qemu/range.h
index 205e1da..4ce694a 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -20,6 +20,8 @@
#ifndef QEMU_RANGE_H
#define QEMU_RANGE_H
+#include "qemu/bitops.h"
+
/*
* Operations on 64 bit address ranges.
* Notes:
@@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1,
return !(last2 < first1 || last1 < first2);
}
+/* Get highest non-zero bit position of a range */
+static inline int range_get_last_bit(Range *range)
+{
+ if (range_is_empty(range)) {
+ return -1;
+ }
+ return 63 - clz64(range->upb);
+}
+
/*
* Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
* Both @a and @b must not be empty.
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9a366e5..5ce83c7 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -245,6 +245,21 @@ bool qemu_clock_run_timers(QEMUClockType type);
*/
bool qemu_clock_run_all_timers(void);
+/**
+ * qemu_clock_advance_virtual_time(): advance the virtual time tick
+ * @target_ns: target time in nanoseconds
+ *
+ * This function is used where the control of the flow of time has
+ * been delegated to outside the clock subsystem (be it qtest, icount
+ * or some other external source). You can ask the clock system to
+ * return @early at the first expired timer.
+ *
+ * Time can only move forward, attempts to reverse time would lead to
+ * an error.
+ *
+ * Returns: new virtual time.
+ */
+int64_t qemu_clock_advance_virtual_time(int64_t target_ns);
/*
* QEMUTimerList
diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h
index ef91fc2..a088672 100644
--- a/include/sysemu/accel-ops.h
+++ b/include/sysemu/accel-ops.h
@@ -20,7 +20,12 @@
typedef struct AccelOpsClass AccelOpsClass;
DECLARE_CLASS_CHECKERS(AccelOpsClass, ACCEL_OPS, TYPE_ACCEL_OPS)
-/* cpus.c operations interface */
+/**
+ * struct AccelOpsClass - accelerator interfaces
+ *
+ * This structure is used to abstract accelerator differences from the
+ * core CPU code. Not all have to be implemented.
+ */
struct AccelOpsClass {
/*< private >*/
ObjectClass parent_class;
@@ -44,7 +49,18 @@ struct AccelOpsClass {
void (*handle_interrupt)(CPUState *cpu, int mask);
+ /**
+ * @get_virtual_clock: fetch virtual clock
+ * @set_virtual_clock: set virtual clock
+ *
+ * These allow the timer subsystem to defer to the accelerator to
+ * fetch time. The set function is needed if the accelerator wants
+ * to track the changes to time as the timer is warped through
+ * various timer events.
+ */
int64_t (*get_virtual_clock)(void);
+ void (*set_virtual_clock)(int64_t time);
+
int64_t (*get_elapsed_ticks)(void);
/* gdbstub hooks */
diff --git a/include/sysemu/cpu-timers.h b/include/sysemu/cpu-timers.h
index d86738a..7bfa960 100644
--- a/include/sysemu/cpu-timers.h
+++ b/include/sysemu/cpu-timers.h
@@ -96,8 +96,9 @@ int64_t cpu_get_clock(void);
void qemu_timer_notify_cb(void *opaque, QEMUClockType type);
-/* get the VIRTUAL clock and VM elapsed ticks via the cpus accel interface */
+/* get/set VIRTUAL clock and VM elapsed ticks via the cpus accel interface */
int64_t cpus_get_virtual_clock(void);
+void cpus_set_virtual_clock(int64_t new_time);
int64_t cpus_get_elapsed_ticks(void);
#endif /* SYSEMU_CPU_TIMERS_H */
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
new file mode 100644
index 0000000..ee6c813
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,102 @@
+/*
+ * Host IOMMU device abstract declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+#include "qom/object.h"
+#include "qapi/error.h"
+
+/**
+ * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
+ *
+ * @type: host platform IOMMU type.
+ *
+ * @aw_bits: host IOMMU address width. 0xff if no limitation.
+ */
+typedef struct HostIOMMUDeviceCaps {
+ uint32_t type;
+ uint8_t aw_bits;
+} HostIOMMUDeviceCaps;
+
+#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
+OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
+
+struct HostIOMMUDevice {
+ Object parent_obj;
+
+ char *name;
+ void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
+ PCIBus *aliased_bus;
+ int aliased_devfn;
+ HostIOMMUDeviceCaps caps;
+};
+
+/**
+ * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices.
+ *
+ * Different types of host devices (e.g., VFIO or VDPA device) or devices
+ * with different backend (e.g., VFIO legacy container or IOMMUFD backend)
+ * will have different implementations of the HostIOMMUDeviceClass.
+ */
+struct HostIOMMUDeviceClass {
+ ObjectClass parent_class;
+
+ /**
+ * @realize: initialize host IOMMU device instance further.
+ *
+ * Mandatory callback.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @opaque: pointer to agent device of this host IOMMU device,
+ * e.g., VFIO base device or VDPA device.
+ *
+ * @errp: pass an Error out when realize fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+ /**
+ * @get_cap: check if a host IOMMU device capability is supported.
+ *
+ * Optional callback, if not implemented, hint not supporting query
+ * of @cap.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @cap: capability to check.
+ *
+ * @errp: pass an Error out when fails to query capability.
+ *
+ * Returns: <0 on failure, 0 if a @cap is unsupported, or else
+ * 1 or some positive value for some special @cap,
+ * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS.
+ */
+ int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp);
+ /**
+ * @get_iova_ranges: Return the list of usable iova_ranges along with
+ * @hiod Host IOMMU device
+ *
+ * @hiod: handle to the host IOMMU device
+ * @errp: error handle
+ */
+ GList* (*get_iova_ranges)(HostIOMMUDevice *hiod, Error **errp);
+};
+
+/*
+ * Host IOMMU device capability list.
+ */
+#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1
+
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64
+#endif
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 293bfbe..9edfec6 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -1,9 +1,23 @@
+/*
+ * iommufd container backend declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ * Copyright Red Hat, Inc. 2024
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
+ * Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
#ifndef SYSEMU_IOMMUFD_H
#define SYSEMU_IOMMUFD_H
#include "qom/object.h"
#include "exec/hwaddr.h"
#include "exec/cpu-common.h"
+#include "sysemu/host_iommu_device.h"
#define TYPE_IOMMUFD_BACKEND "iommufd"
OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
@@ -33,4 +47,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly);
int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp);
+
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif
diff --git a/include/sysemu/qtest.h b/include/sysemu/qtest.h
index b5d5fd3..c161d75 100644
--- a/include/sysemu/qtest.h
+++ b/include/sysemu/qtest.h
@@ -34,8 +34,6 @@ void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **
void qtest_server_set_send_handler(void (*send)(void *, const char *),
void *opaque);
void qtest_server_inproc_recv(void *opaque, const char *buf);
-
-int64_t qtest_get_virtual_clock(void);
#endif
#endif
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 45ee3a9..f601d06 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -15,8 +15,9 @@
#include "qemu/osdep.h"
#include "exec/address-spaces.h"
-#include "exec/gdbstub.h"
#include "exec/ioport.h"
+#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "monitor/hmp.h"
#include "qemu/help_option.h"
#include "monitor/monitor-internal.h"
diff --git a/plugins/api.c b/plugins/api.c
index 5a0a7f8..2ff13d0 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -39,6 +39,7 @@
#include "qemu/main-loop.h"
#include "qemu/plugin.h"
#include "qemu/log.h"
+#include "qemu/timer.h"
#include "tcg/tcg.h"
#include "exec/exec-all.h"
#include "exec/gdbstub.h"
@@ -46,6 +47,8 @@
#include "disas/disas.h"
#include "plugin.h"
#ifndef CONFIG_USER_ONLY
+#include "qapi/error.h"
+#include "migration/blocker.h"
#include "exec/ram_addr.h"
#include "qemu/plugin-memory.h"
#include "hw/boards.h"
@@ -507,7 +510,7 @@ static GArray *create_register_handles(GArray *gdbstub_regs)
}
/* Create a record for the plugin */
- desc.handle = GINT_TO_POINTER(grd->gdb_reg);
+ desc.handle = GINT_TO_POINTER(grd->gdb_reg + 1);
desc.name = g_intern_string(grd->name);
desc.feature = g_intern_string(grd->feature_name);
g_array_append_val(find_data, desc);
@@ -528,7 +531,7 @@ int qemu_plugin_read_register(struct qemu_plugin_register *reg, GByteArray *buf)
{
g_assert(current_cpu);
- return gdb_read_register(current_cpu, buf, GPOINTER_TO_INT(reg));
+ return gdb_read_register(current_cpu, buf, GPOINTER_TO_INT(reg) - 1);
}
struct qemu_plugin_scoreboard *qemu_plugin_scoreboard_new(size_t element_size)
@@ -583,3 +586,45 @@ uint64_t qemu_plugin_u64_sum(qemu_plugin_u64 entry)
}
return total;
}
+
+/*
+ * Time control
+ */
+static bool has_control;
+#ifdef CONFIG_SOFTMMU
+static Error *migration_blocker;
+#endif
+
+const void *qemu_plugin_request_time_control(void)
+{
+ if (!has_control) {
+ has_control = true;
+#ifdef CONFIG_SOFTMMU
+ error_setg(&migration_blocker,
+ "TCG plugin time control does not support migration");
+ migrate_add_blocker(&migration_blocker, NULL);
+#endif
+ return &has_control;
+ }
+ return NULL;
+}
+
+#ifdef CONFIG_SOFTMMU
+static void advance_virtual_time__async(CPUState *cpu, run_on_cpu_data data)
+{
+ int64_t new_time = data.host_ulong;
+ qemu_clock_advance_virtual_time(new_time);
+}
+#endif
+
+void qemu_plugin_update_ns(const void *handle, int64_t new_time)
+{
+#ifdef CONFIG_SOFTMMU
+ if (handle == &has_control) {
+ /* Need to execute out of cpu_exec, so bql can be locked. */
+ async_run_on_cpu(current_cpu,
+ advance_virtual_time__async,
+ RUN_ON_CPU_HOST_ULONG(new_time));
+ }
+#endif
+}
diff --git a/plugins/core.c b/plugins/core.c
index badede2..9d737d8 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -589,7 +589,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
switch (cb->type) {
case PLUGIN_CB_MEM_REGULAR:
- if (rw && cb->regular.rw) {
+ if (rw & cb->regular.rw) {
cb->regular.f.vcpu_mem(cpu->cpu_index,
make_plugin_meminfo(oi, rw),
vaddr, cb->regular.userp);
@@ -597,7 +597,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
break;
case PLUGIN_CB_INLINE_ADD_U64:
case PLUGIN_CB_INLINE_STORE_U64:
- if (rw && cb->inline_insn.rw) {
+ if (rw & cb->inline_insn.rw) {
exec_inline_op(cb->type, &cb->inline_insn, cpu->cpu_index);
}
break;
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index aa0a77a..ca773d8 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -38,6 +38,7 @@
qemu_plugin_register_vcpu_tb_exec_cond_cb;
qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu;
qemu_plugin_register_vcpu_tb_trans_cb;
+ qemu_plugin_request_time_control;
qemu_plugin_reset;
qemu_plugin_scoreboard_free;
qemu_plugin_scoreboard_find;
@@ -51,5 +52,6 @@
qemu_plugin_u64_set;
qemu_plugin_u64_sum;
qemu_plugin_uninstall;
+ qemu_plugin_update_ns;
qemu_plugin_vcpu_for_each;
};
diff --git a/stubs/cpus-get-virtual-clock.c b/stubs/cpus-virtual-clock.c
index fd447d5..af7c1a1 100644
--- a/stubs/cpus-get-virtual-clock.c
+++ b/stubs/cpus-virtual-clock.c
@@ -6,3 +6,8 @@ int64_t cpus_get_virtual_clock(void)
{
return cpu_get_clock();
}
+
+void cpus_set_virtual_clock(int64_t new_time)
+{
+ /* do nothing */
+}
diff --git a/stubs/meson.build b/stubs/meson.build
index f15b48d..772a3e8 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -29,7 +29,7 @@ endif
if have_block or have_ga
stub_ss.add(files('replay-tools.c'))
# stubs for hooks in util/main-loop.c, util/async.c etc.
- stub_ss.add(files('cpus-get-virtual-clock.c'))
+ stub_ss.add(files('cpus-virtual-clock.c'))
stub_ss.add(files('icount.c'))
stub_ss.add(files('graph-lock.c'))
if linux_io_uring.found()
diff --git a/system/cpus.c b/system/cpus.c
index f8fa78f..d3640c9 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -230,6 +230,17 @@ int64_t cpus_get_virtual_clock(void)
}
/*
+ * Signal the new virtual time to the accelerator. This is only needed
+ * by accelerators that need to track the changes as we warp time.
+ */
+void cpus_set_virtual_clock(int64_t new_time)
+{
+ if (cpus_accel && cpus_accel->set_virtual_clock) {
+ cpus_accel->set_virtual_clock(new_time);
+ }
+}
+
+/*
* return the time elapsed in VM between vm_start and vm_stop. Unless
* icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle
* counter.
diff --git a/system/memory.c b/system/memory.c
index 47c600d..2d69521 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1914,19 +1914,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
return ret;
}
-int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,
- GList *iova_ranges,
- Error **errp)
-{
- IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
- int ret = 0;
-
- if (imrc->iommu_set_iova_ranges) {
- ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp);
- }
- return ret;
-}
-
int memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n, Error **errp)
{
diff --git a/system/qtest.c b/system/qtest.c
index 507a358..12703a2 100644
--- a/system/qtest.c
+++ b/system/qtest.c
@@ -325,38 +325,6 @@ static void qtest_irq_handler(void *opaque, int n, int level)
}
}
-static int64_t qtest_clock_counter;
-
-int64_t qtest_get_virtual_clock(void)
-{
- return qatomic_read_i64(&qtest_clock_counter);
-}
-
-static void qtest_set_virtual_clock(int64_t count)
-{
- qatomic_set_i64(&qtest_clock_counter, count);
-}
-
-static void qtest_clock_warp(int64_t dest)
-{
- int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- AioContext *aio_context;
- assert(qtest_enabled());
- aio_context = qemu_get_aio_context();
- while (clock < dest) {
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
- QEMU_TIMER_ATTR_ALL);
- int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
-
- qtest_set_virtual_clock(qtest_get_virtual_clock() + warp);
-
- qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
- timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
- clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- }
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-}
-
static bool (*process_command_cb)(CharBackend *chr, gchar **words);
void qtest_set_command_cb(bool (*pc_cb)(CharBackend *chr, gchar **words))
@@ -751,7 +719,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
QEMU_TIMER_ATTR_ALL);
}
- qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns);
+ qemu_clock_advance_virtual_time(
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns);
qtest_send_prefix(chr);
qtest_sendf(chr, "OK %"PRIi64"\n",
(int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
@@ -777,7 +746,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
g_assert(words[1]);
ret = qemu_strtoi64(words[1], NULL, 0, &ns);
g_assert(ret == 0);
- qtest_clock_warp(ns);
+ qemu_clock_advance_virtual_time(ns);
qtest_send_prefix(chr);
qtest_sendf(chr, "OK %"PRIi64"\n",
(int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
diff --git a/system/vl.c b/system/vl.c
index a3eede5..cfcb674 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -68,6 +68,7 @@
#include "sysemu/numa.h"
#include "sysemu/hostmem.h"
#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "qemu/timer.h"
#include "chardev/char.h"
#include "qemu/bitmap.h"
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 45e2218..ef9bc42 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -33,7 +33,7 @@
#include "trace/trace-target_arm_hvf.h"
#include "migration/vmstate.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#define MDSCR_EL1_SS_SHIFT 0
#define MDSCR_EL1_MDE_SHIFT 15
diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c
index ebde289..f120d55 100644
--- a/target/arm/hyp_gdbstub.c
+++ b/target/arm/hyp_gdbstub.c
@@ -12,7 +12,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "internals.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
/* Maximum and current break/watch point counts */
int max_hw_bps, max_hw_wps;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 7cf5cf3..70f79ed 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -31,7 +31,7 @@
#include "hw/pci/pci.h"
#include "exec/memattrs.h"
#include "exec/address-spaces.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "hw/boards.h"
#include "hw/irq.h"
#include "qapi/visitor.h"
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 7ad8072..dd8b0f3 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -38,7 +38,7 @@
#include "hyperv.h"
#include "hyperv-proto.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "qemu/host-utils.h"
#include "qemu/main-loop.h"
#include "qemu/ratelimit.h"
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 005f223..2c39322 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -39,7 +39,7 @@
#include "migration/qemu-file-types.h"
#include "sysemu/watchdog.h"
#include "trace.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "exec/memattrs.h"
#include "exec/ram_addr.h"
#include "sysemu/hostmem.h"
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 69a08e8..a2640cf 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1779,7 +1779,9 @@ static int priv_spec_from_str(const char *priv_spec_str)
{
int priv_version = -1;
- if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) {
+ if (!g_strcmp0(priv_spec_str, PRIV_VER_1_13_0_STR)) {
+ priv_version = PRIV_VERSION_1_13_0;
+ } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) {
priv_version = PRIV_VERSION_1_12_0;
} else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_11_0_STR)) {
priv_version = PRIV_VERSION_1_11_0;
@@ -1790,7 +1792,7 @@ static int priv_spec_from_str(const char *priv_spec_str)
return priv_version;
}
-static const char *priv_spec_to_str(int priv_version)
+const char *priv_spec_to_str(int priv_version)
{
switch (priv_version) {
case PRIV_VERSION_1_10_0:
@@ -1799,6 +1801,8 @@ static const char *priv_spec_to_str(int priv_version)
return PRIV_VER_1_11_0_STR;
case PRIV_VERSION_1_12_0:
return PRIV_VER_1_12_0_STR;
+ case PRIV_VERSION_1_13_0:
+ return PRIV_VER_1_13_0_STR;
default:
return NULL;
}
@@ -2246,6 +2250,402 @@ RISCVCPUProfile *riscv_profiles[] = {
NULL,
};
+static RISCVCPUImpliedExtsRule RVA_IMPLIED = {
+ .is_misa = true,
+ .ext = RVA,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zalrsc), CPU_CFG_OFFSET(ext_zaamo),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule RVD_IMPLIED = {
+ .is_misa = true,
+ .ext = RVD,
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END },
+};
+
+static RISCVCPUImpliedExtsRule RVF_IMPLIED = {
+ .is_misa = true,
+ .ext = RVF,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule RVM_IMPLIED = {
+ .is_misa = true,
+ .ext = RVM,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zmmul),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule RVV_IMPLIED = {
+ .is_misa = true,
+ .ext = RVV,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve64d),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCB_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zcb),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zca),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCD_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zcd),
+ .implied_misa_exts = RVD,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zca),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCE_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zce),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zcb), CPU_CFG_OFFSET(ext_zcmp),
+ CPU_CFG_OFFSET(ext_zcmt),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCF_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zcf),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zca),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCMP_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zcmp),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zca),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZCMT_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zcmt),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zca), CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZDINX_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zdinx),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zfinx),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZFA_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zfa),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END },
+};
+
+static RISCVCPUImpliedExtsRule ZFBFMIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zfbfmin),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END },
+};
+
+static RISCVCPUImpliedExtsRule ZFH_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zfh),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zfhmin),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZFHMIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zfhmin),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END },
+};
+
+static RISCVCPUImpliedExtsRule ZFINX_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zfinx),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZHINX_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zhinx),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zhinxmin),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZHINXMIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zhinxmin),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zfinx),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZICNTR_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zicntr),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZIHPM_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zihpm),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZK_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zk),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zkn), CPU_CFG_OFFSET(ext_zkr),
+ CPU_CFG_OFFSET(ext_zkt),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZKN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zkn),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc),
+ CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zkne),
+ CPU_CFG_OFFSET(ext_zknd), CPU_CFG_OFFSET(ext_zknh),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZKS_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zks),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc),
+ CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zksed),
+ CPU_CFG_OFFSET(ext_zksh),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVBB_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvbb),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvkb),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVE32F_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zve32f),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32x),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVE32X_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zve32x),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zicsr),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVE64D_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zve64d),
+ .implied_misa_exts = RVD,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve64f),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVE64F_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zve64f),
+ .implied_misa_exts = RVF,
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32f), CPU_CFG_OFFSET(ext_zve64x),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVE64X_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zve64x),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32x),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVFBFMIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfbfmin),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32f),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVFBFWMA_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfbfwma),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvfbfmin), CPU_CFG_OFFSET(ext_zfbfmin),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVFH_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfh),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvfhmin), CPU_CFG_OFFSET(ext_zfhmin),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVFHMIN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvfhmin),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve32f),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKN_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvkn),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvkned), CPU_CFG_OFFSET(ext_zvknhb),
+ CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKNC_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvknc),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvbc),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKNG_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvkng),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvkg),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKNHB_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvknhb),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zve64x),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKS_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvks),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvksed), CPU_CFG_OFFSET(ext_zvksh),
+ CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKSC_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvksc),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvbc),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+static RISCVCPUImpliedExtsRule ZVKSG_IMPLIED = {
+ .ext = CPU_CFG_OFFSET(ext_zvksg),
+ .implied_multi_exts = {
+ CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvkg),
+
+ RISCV_IMPLIED_EXTS_RULE_END
+ },
+};
+
+RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[] = {
+ &RVA_IMPLIED, &RVD_IMPLIED, &RVF_IMPLIED,
+ &RVM_IMPLIED, &RVV_IMPLIED, NULL
+};
+
+RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = {
+ &ZCB_IMPLIED, &ZCD_IMPLIED, &ZCE_IMPLIED,
+ &ZCF_IMPLIED, &ZCMP_IMPLIED, &ZCMT_IMPLIED,
+ &ZDINX_IMPLIED, &ZFA_IMPLIED, &ZFBFMIN_IMPLIED,
+ &ZFH_IMPLIED, &ZFHMIN_IMPLIED, &ZFINX_IMPLIED,
+ &ZHINX_IMPLIED, &ZHINXMIN_IMPLIED, &ZICNTR_IMPLIED,
+ &ZIHPM_IMPLIED, &ZK_IMPLIED, &ZKN_IMPLIED,
+ &ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED,
+ &ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED,
+ &ZVE64X_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED,
+ &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED,
+ &ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED,
+ &ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED,
+ NULL
+};
+
static Property riscv_cpu_properties[] = {
DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6fe0d71..8774204 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -96,12 +96,14 @@ extern RISCVCPUProfile *riscv_profiles[];
#define PRIV_VER_1_10_0_STR "v1.10.0"
#define PRIV_VER_1_11_0_STR "v1.11.0"
#define PRIV_VER_1_12_0_STR "v1.12.0"
+#define PRIV_VER_1_13_0_STR "v1.13.0"
enum {
PRIV_VERSION_1_10_0 = 0,
PRIV_VERSION_1_11_0,
PRIV_VERSION_1_12_0,
+ PRIV_VERSION_1_13_0,
- PRIV_VERSION_LATEST = PRIV_VERSION_1_12_0,
+ PRIV_VERSION_LATEST = PRIV_VERSION_1_13_0,
};
#define VEXT_VERSION_1_00_0 0x00010000
@@ -122,6 +124,29 @@ typedef enum {
EXT_STATUS_DIRTY,
} RISCVExtStatus;
+typedef struct riscv_cpu_implied_exts_rule {
+#ifndef CONFIG_USER_ONLY
+ /*
+ * Bitmask indicates the rule enabled status for the harts.
+ * This enhancement is only available in system-mode QEMU,
+ * as we don't have a good way (e.g. mhartid) to distinguish
+ * the SMP cores in user-mode QEMU.
+ */
+ unsigned long *enabled;
+#endif
+ /* True if this is a MISA implied rule. */
+ bool is_misa;
+ /* ext is MISA bit if is_misa flag is true, else multi extension offset. */
+ const uint32_t ext;
+ const uint32_t implied_misa_exts;
+ const uint32_t implied_multi_exts[];
+} RISCVCPUImpliedExtsRule;
+
+extern RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[];
+extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[];
+
+#define RISCV_IMPLIED_EXTS_RULE_END -1
+
#define MMU_USER_IDX 3
#define MAX_RISCV_PMPS (16)
@@ -830,4 +855,5 @@ const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit);
/* Implemented in th_csr.c */
void th_register_custom_csrs(RISCVCPU *cpu);
+const char *priv_spec_to_str(int priv_version);
#endif /* RISCV_CPU_H */
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 74318a9..c257c5e 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -156,6 +156,8 @@
/* 32-bit only */
#define CSR_MSTATUSH 0x310
+#define CSR_MEDELEGH 0x312
+#define CSR_HEDELEGH 0x612
/* Machine Trap Handling */
#define CSR_MSCRATCH 0x340
@@ -315,6 +317,7 @@
#define SMSTATEEN0_CS (1ULL << 0)
#define SMSTATEEN0_FCSR (1ULL << 1)
#define SMSTATEEN0_JVT (1ULL << 2)
+#define SMSTATEEN0_P1P13 (1ULL << 56)
#define SMSTATEEN0_HSCONTXT (1ULL << 57)
#define SMSTATEEN0_IMSIC (1ULL << 58)
#define SMSTATEEN0_AIA (1ULL << 59)
@@ -670,6 +673,8 @@ typedef enum RISCVException {
RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */
RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */
RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */
+ RISCV_EXCP_SW_CHECK = 0x12, /* since: priv-1.13.0 */
+ RISCV_EXCP_HW_ERR = 0x13, /* since: priv-1.13.0 */
RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14,
RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15,
RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16,
@@ -695,7 +700,8 @@ typedef enum RISCVException {
#define IRQ_M_EXT 11
#define IRQ_S_GEXT 12
#define IRQ_PMU_OVF 13
-#define IRQ_LOCAL_MAX 16
+#define IRQ_LOCAL_MAX 64
+/* -1 is due to bit zero of hgeip and hgeie being ROZ. */
#define IRQ_LOCAL_GUEST_MAX (TARGET_LONG_BITS - 1)
/* mip masks */
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index e1e4f32..fb7eebd 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -136,6 +136,7 @@ struct RISCVCPUConfig {
* TCG always implement/can't be user disabled,
* based on spec version.
*/
+ bool has_priv_1_13;
bool has_priv_1_12;
bool has_priv_1_11;
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 58ef707..432c59d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -762,14 +762,18 @@ static RISCVException write_vcsr(CPURISCVState *env, int csrno,
}
/* User Timers and Counters */
-static target_ulong get_ticks(bool shift)
+static target_ulong get_ticks(bool shift, bool instructions)
{
int64_t val;
target_ulong result;
#if !defined(CONFIG_USER_ONLY)
if (icount_enabled()) {
- val = icount_get();
+ if (instructions) {
+ val = icount_get_raw();
+ } else {
+ val = icount_get();
+ }
} else {
val = cpu_get_host_ticks();
}
@@ -804,14 +808,14 @@ static RISCVException read_timeh(CPURISCVState *env, int csrno,
static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
target_ulong *val)
{
- *val = get_ticks(false);
+ *val = get_ticks(false, (csrno == CSR_INSTRET));
return RISCV_EXCP_NONE;
}
static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
target_ulong *val)
{
- *val = get_ticks(true);
+ *val = get_ticks(true, (csrno == CSR_INSTRETH));
return RISCV_EXCP_NONE;
}
@@ -875,11 +879,11 @@ static RISCVException write_mhpmcounter(CPURISCVState *env, int csrno,
int ctr_idx = csrno - CSR_MCYCLE;
PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
uint64_t mhpmctr_val = val;
+ bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
counter->mhpmcounter_val = val;
- if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
- riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
- counter->mhpmcounter_prev = get_ticks(false);
+ if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
+ counter->mhpmcounter_prev = get_ticks(false, instr);
if (ctr_idx > 2) {
if (riscv_cpu_mxl(env) == MXL_RV32) {
mhpmctr_val = mhpmctr_val |
@@ -902,12 +906,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno,
PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
uint64_t mhpmctr_val = counter->mhpmcounter_val;
uint64_t mhpmctrh_val = val;
+ bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
counter->mhpmcounterh_val = val;
mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32);
- if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
- riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
- counter->mhpmcounterh_prev = get_ticks(true);
+ if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
+ counter->mhpmcounterh_prev = get_ticks(true, instr);
if (ctr_idx > 2) {
riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx);
}
@@ -926,6 +930,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
counter->mhpmcounter_prev;
target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val :
counter->mhpmcounter_val;
+ bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
if (get_field(env->mcountinhibit, BIT(ctr_idx))) {
/*
@@ -946,9 +951,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val,
* The kernel computes the perf delta by subtracting the current value from
* the value it initialized previously (ctr_val).
*/
- if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
- riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
- *val = get_ticks(upper_half) - ctr_prev + ctr_val;
+ if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
+ *val = get_ticks(upper_half, instr) - ctr_prev + ctr_val;
} else {
*val = ctr_val;
}
@@ -1145,7 +1149,14 @@ static RISCVException write_stimecmph(CPURISCVState *env, int csrno,
#define VSTOPI_NUM_SRCS 5
-#define LOCAL_INTERRUPTS (~0x1FFF)
+/*
+ * All core local interrupts except the fixed ones 0:12. This macro is for
+ * virtual interrupts logic so please don't change this to avoid messing up
+ * the whole support, For reference see AIA spec: `5.3 Interrupt filtering and
+ * virtual interrupts for supervisor level` and `6.3.2 Virtual interrupts for
+ * VS level`.
+ */
+#define LOCAL_INTERRUPTS (~0x1FFFULL)
static const uint64_t delegable_ints =
S_MODE_INTERRUPTS | VS_MODE_INTERRUPTS | MIP_LCOFIP;
@@ -1197,18 +1208,18 @@ static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
*/
/* Bit STIP can be an alias of mip.STIP that's why it's writable in mvip. */
-static const target_ulong mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP |
+static const uint64_t mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP |
LOCAL_INTERRUPTS;
-static const target_ulong mvien_writable_mask = MIP_SSIP | MIP_SEIP |
+static const uint64_t mvien_writable_mask = MIP_SSIP | MIP_SEIP |
LOCAL_INTERRUPTS;
-static const target_ulong sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS;
-static const target_ulong hip_writable_mask = MIP_VSSIP;
-static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP |
+static const uint64_t sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS;
+static const uint64_t hip_writable_mask = MIP_VSSIP;
+static const uint64_t hvip_writable_mask = MIP_VSSIP | MIP_VSTIP |
MIP_VSEIP | LOCAL_INTERRUPTS;
-static const target_ulong hvien_writable_mask = LOCAL_INTERRUPTS;
+static const uint64_t hvien_writable_mask = LOCAL_INTERRUPTS;
-static const target_ulong vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS;
+static const uint64_t vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS;
const bool valid_vm_1_10_32[16] = {
[VM_1_10_MBARE] = true,
@@ -2245,6 +2256,10 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno,
wr_mask |= SMSTATEEN0_FCSR;
}
+ if (env->priv_ver >= PRIV_VERSION_1_13_0) {
+ wr_mask |= SMSTATEEN0_P1P13;
+ }
+
return write_mstateen(env, csrno, wr_mask, new_val);
}
@@ -2280,6 +2295,10 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno,
{
uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG;
+ if (env->priv_ver >= PRIV_VERSION_1_13_0) {
+ wr_mask |= SMSTATEEN0_P1P13;
+ }
+
return write_mstateenh(env, csrno, wr_mask, new_val);
}
@@ -3214,6 +3233,33 @@ static RISCVException write_hedeleg(CPURISCVState *env, int csrno,
return RISCV_EXCP_NONE;
}
+static RISCVException read_hedelegh(CPURISCVState *env, int csrno,
+ target_ulong *val)
+{
+ RISCVException ret;
+ ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13);
+ if (ret != RISCV_EXCP_NONE) {
+ return ret;
+ }
+
+ /* Reserved, now read zero */
+ *val = 0;
+ return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_hedelegh(CPURISCVState *env, int csrno,
+ target_ulong val)
+{
+ RISCVException ret;
+ ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13);
+ if (ret != RISCV_EXCP_NONE) {
+ return ret;
+ }
+
+ /* Reserved, now write ignore */
+ return RISCV_EXCP_NONE;
+}
+
static RISCVException rmw_hvien64(CPURISCVState *env, int csrno,
uint64_t *ret_val,
uint64_t new_val, uint64_t wr_mask)
@@ -4618,6 +4664,10 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
[CSR_MSTATUSH] = { "mstatush", any32, read_mstatush,
write_mstatush },
+ [CSR_MEDELEGH] = { "medelegh", any32, read_zero, write_ignore,
+ .min_priv_ver = PRIV_VERSION_1_13_0 },
+ [CSR_HEDELEGH] = { "hedelegh", hmode32, read_hedelegh, write_hedelegh,
+ .min_priv_ver = PRIV_VERSION_1_13_0 },
/* Machine Trap Handling */
[CSR_MSCRATCH] = { "mscratch", any, read_mscratch, write_mscratch,
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index b110370..0b5099f 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -241,6 +241,76 @@ static void do_trigger_action(CPURISCVState *env, target_ulong trigger_index)
}
}
+/*
+ * Check the privilege level of specific trigger matches CPU's current privilege
+ * level.
+ */
+static bool trigger_priv_match(CPURISCVState *env, trigger_type_t type,
+ int trigger_index)
+{
+ target_ulong ctrl = env->tdata1[trigger_index];
+
+ switch (type) {
+ case TRIGGER_TYPE_AD_MATCH:
+ /* type 2 trigger cannot be fired in VU/VS mode */
+ if (env->virt_enabled) {
+ return false;
+ }
+ /* check U/S/M bit against current privilege level */
+ if ((ctrl >> 3) & BIT(env->priv)) {
+ return true;
+ }
+ break;
+ case TRIGGER_TYPE_AD_MATCH6:
+ if (env->virt_enabled) {
+ /* check VU/VS bit against current privilege level */
+ if ((ctrl >> 23) & BIT(env->priv)) {
+ return true;
+ }
+ } else {
+ /* check U/S/M bit against current privilege level */
+ if ((ctrl >> 3) & BIT(env->priv)) {
+ return true;
+ }
+ }
+ break;
+ case TRIGGER_TYPE_INST_CNT:
+ if (env->virt_enabled) {
+ /* check VU/VS bit against current privilege level */
+ if ((ctrl >> 25) & BIT(env->priv)) {
+ return true;
+ }
+ } else {
+ /* check U/S/M bit against current privilege level */
+ if ((ctrl >> 6) & BIT(env->priv)) {
+ return true;
+ }
+ }
+ break;
+ case TRIGGER_TYPE_INT:
+ case TRIGGER_TYPE_EXCP:
+ case TRIGGER_TYPE_EXT_SRC:
+ qemu_log_mask(LOG_UNIMP, "trigger type: %d is not supported\n", type);
+ break;
+ case TRIGGER_TYPE_NO_EXIST:
+ case TRIGGER_TYPE_UNAVAIL:
+ qemu_log_mask(LOG_GUEST_ERROR, "trigger type: %d does not exist\n",
+ type);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return false;
+}
+
+/* Common matching conditions for all types of the triggers. */
+static bool trigger_common_match(CPURISCVState *env, trigger_type_t type,
+ int trigger_index)
+{
+ return trigger_priv_match(env, type, trigger_index);
+}
+
/* type 2 trigger */
static uint32_t type2_breakpoint_size(CPURISCVState *env, target_ulong ctrl)
@@ -554,7 +624,7 @@ void helper_itrigger_match(CPURISCVState *env)
if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) {
continue;
}
- if (check_itrigger_priv(env, i)) {
+ if (!trigger_common_match(env, TRIGGER_TYPE_INST_CNT, i)) {
continue;
}
count = itrigger_get_count(env, i);
@@ -785,22 +855,18 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs)
for (i = 0; i < RV_MAX_TRIGGERS; i++) {
trigger_type = get_trigger_type(env, i);
+ if (!trigger_common_match(env, trigger_type, i)) {
+ continue;
+ }
+
switch (trigger_type) {
case TRIGGER_TYPE_AD_MATCH:
- /* type 2 trigger cannot be fired in VU/VS mode */
- if (env->virt_enabled) {
- return false;
- }
-
ctrl = env->tdata1[i];
pc = env->tdata2[i];
if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) {
- /* check U/S/M bit against current privilege level */
- if ((ctrl >> 3) & BIT(env->priv)) {
- env->badaddr = pc;
- return true;
- }
+ env->badaddr = pc;
+ return true;
}
break;
case TRIGGER_TYPE_AD_MATCH6:
@@ -808,19 +874,8 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs)
pc = env->tdata2[i];
if ((ctrl & TYPE6_EXEC) && (bp->pc == pc)) {
- if (env->virt_enabled) {
- /* check VU/VS bit against current privilege level */
- if ((ctrl >> 23) & BIT(env->priv)) {
- env->badaddr = pc;
- return true;
- }
- } else {
- /* check U/S/M bit against current privilege level */
- if ((ctrl >> 3) & BIT(env->priv)) {
- env->badaddr = pc;
- return true;
- }
- }
+ env->badaddr = pc;
+ return true;
}
break;
default:
@@ -846,13 +901,12 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
for (i = 0; i < RV_MAX_TRIGGERS; i++) {
trigger_type = get_trigger_type(env, i);
+ if (!trigger_common_match(env, trigger_type, i)) {
+ continue;
+ }
+
switch (trigger_type) {
case TRIGGER_TYPE_AD_MATCH:
- /* type 2 trigger cannot be fired in VU/VS mode */
- if (env->virt_enabled) {
- return false;
- }
-
ctrl = env->tdata1[i];
addr = env->tdata2[i];
flags = 0;
@@ -865,10 +919,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
}
if ((wp->flags & flags) && (wp->vaddr == addr)) {
- /* check U/S/M bit against current privilege level */
- if ((ctrl >> 3) & BIT(env->priv)) {
- return true;
- }
+ return true;
}
break;
case TRIGGER_TYPE_AD_MATCH6:
@@ -884,17 +935,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
}
if ((wp->flags & flags) && (wp->vaddr == addr)) {
- if (env->virt_enabled) {
- /* check VU/VS bit against current privilege level */
- if ((ctrl >> 23) & BIT(env->priv)) {
- return true;
- }
- } else {
- /* check U/S/M bit against current privilege level */
- if ((ctrl >> 3) & BIT(env->priv)) {
- return true;
- }
- }
+ return true;
}
break;
default:
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 871a70a..91b1a56 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -676,7 +676,7 @@ uint64_t helper_fround_h(CPURISCVState *env, uint64_t rs1)
uint64_t helper_froundnx_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_s(env, rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
frs1 = float16_round_to_int(frs1, &env->fp_status);
return nanbox_h(env, frs1);
}
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 235e2cd..1047961 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1555,6 +1555,21 @@ static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run)
return ret;
}
+static bool kvm_riscv_handle_debug(CPUState *cs)
+{
+ RISCVCPU *cpu = RISCV_CPU(cs);
+ CPURISCVState *env = &cpu->env;
+
+ /* Ensure PC is synchronised */
+ kvm_cpu_synchronize_state(cs);
+
+ if (kvm_find_sw_breakpoint(cs, env->pc)) {
+ return true;
+ }
+
+ return false;
+}
+
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
{
int ret = 0;
@@ -1565,6 +1580,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case KVM_EXIT_RISCV_CSR:
ret = kvm_riscv_handle_csr(cs, run);
break;
+ case KVM_EXIT_DEBUG:
+ if (kvm_riscv_handle_debug(cs)) {
+ ret = EXCP_DEBUG;
+ }
+ break;
default:
qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
__func__, run->exit_reason);
@@ -1969,3 +1989,72 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = {
};
DEFINE_TYPES(riscv_kvm_cpu_type_infos)
+
+static const uint32_t ebreak_insn = 0x00100073;
+static const uint16_t c_ebreak_insn = 0x9002;
+
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 0)) {
+ return -EINVAL;
+ }
+
+ if ((bp->saved_insn & 0x3) == 0x3) {
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0)
+ || cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ } else {
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak_insn, 2, 1)) {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+ uint32_t ebreak;
+ uint16_t c_ebreak;
+
+ if ((bp->saved_insn & 0x3) == 0x3) {
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak, 4, 0) ||
+ ebreak != ebreak_insn ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ } else {
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak, 2, 0) ||
+ c_ebreak != c_ebreak_insn ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 1)) {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ /* TODO; To be implemented later. */
+ return -EINVAL;
+}
+
+int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ /* TODO; To be implemented later. */
+ return -EINVAL;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+ /* TODO; To be implemented later. */
+}
+
+void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
+{
+ if (kvm_sw_breakpoints_active(cs)) {
+ dbg->control |= KVM_GUESTDBG_ENABLE;
+ }
+}
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 683f604..ae25686 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -31,11 +31,17 @@
#include "hw/core/accel-cpu.h"
#include "hw/core/tcg-cpu-ops.h"
#include "tcg/tcg.h"
+#ifndef CONFIG_USER_ONLY
+#include "hw/boards.h"
+#endif
/* Hash that stores user set extensions */
static GHashTable *multi_ext_user_opts;
static GHashTable *misa_ext_user_opts;
+static GHashTable *multi_ext_implied_rules;
+static GHashTable *misa_ext_implied_rules;
+
static bool cpu_cfg_ext_is_user_set(uint32_t ext_offset)
{
return g_hash_table_contains(multi_ext_user_opts,
@@ -76,16 +82,11 @@ static void riscv_cpu_write_misa_bit(RISCVCPU *cpu, uint32_t bit,
static const char *cpu_priv_ver_to_str(int priv_ver)
{
- switch (priv_ver) {
- case PRIV_VERSION_1_10_0:
- return "v1.10.0";
- case PRIV_VERSION_1_11_0:
- return "v1.11.0";
- case PRIV_VERSION_1_12_0:
- return "v1.12.0";
- }
+ const char *priv_spec_str = priv_spec_to_str(priv_ver);
- g_assert_not_reached();
+ g_assert(priv_spec_str);
+
+ return priv_spec_str;
}
static void riscv_cpu_synchronize_from_tb(CPUState *cs,
@@ -323,6 +324,10 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu)
cpu->cfg.has_priv_1_12 = true;
}
+ if (cpu->env.priv_ver >= PRIV_VERSION_1_13_0) {
+ cpu->cfg.has_priv_1_13 = true;
+ }
+
/* zic64b is 1.12 or later */
cpu->cfg.ext_zic64b = cpu->cfg.cbom_blocksize == 64 &&
cpu->cfg.cbop_blocksize == 64 &&
@@ -466,10 +471,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
- if (cpu->cfg.ext_zfh) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zfhmin), true);
- }
-
if (cpu->cfg.ext_zfhmin && !riscv_has_ext(env, RVF)) {
error_setg(errp, "Zfh/Zfhmin extensions require F extension");
return;
@@ -491,9 +492,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
error_propagate(errp, local_err);
return;
}
-
- /* The V vector extension depends on the Zve64d extension */
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64d), true);
}
/* The Zve64d extension depends on the Zve64f extension */
@@ -502,18 +500,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
error_setg(errp, "Zve64d/V extensions require D extension");
return;
}
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true);
- }
-
- /* The Zve64f extension depends on the Zve64x and Zve32f extensions */
- if (cpu->cfg.ext_zve64f) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64x), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true);
- }
-
- /* The Zve64x extension depends on the Zve32x extension */
- if (cpu->cfg.ext_zve64x) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true);
}
/* The Zve32f extension depends on the Zve32x extension */
@@ -522,11 +508,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
error_setg(errp, "Zve32f/Zve64f extensions require F extension");
return;
}
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true);
- }
-
- if (cpu->cfg.ext_zvfh) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvfhmin), true);
}
if (cpu->cfg.ext_zvfhmin && !cpu->cfg.ext_zve32f) {
@@ -549,11 +530,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
- /* Set the ISA extensions, checks should have happened above */
- if (cpu->cfg.ext_zhinx) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true);
- }
-
if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) {
error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx");
return;
@@ -571,27 +547,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
}
}
- if (cpu->cfg.ext_zce) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true);
- if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true);
- }
- }
-
- /* zca, zcd and zcf has a PRIV 1.12.0 restriction */
- if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true);
- if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true);
- }
- if (riscv_has_ext(env, RVD)) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true);
- }
- }
-
if (mcc->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) {
error_setg(errp, "Zcf extension is only relevant to RV32");
return;
@@ -625,48 +580,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
- /*
- * Shorthand vector crypto extensions
- */
- if (cpu->cfg.ext_zvknc) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
- }
-
- if (cpu->cfg.ext_zvkng) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true);
- }
-
- if (cpu->cfg.ext_zvkn) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkned), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvknhb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true);
- }
-
- if (cpu->cfg.ext_zvksc) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
- }
-
- if (cpu->cfg.ext_zvksg) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true);
- }
-
- if (cpu->cfg.ext_zvks) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksed), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksh), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true);
- }
-
- if (cpu->cfg.ext_zvkt) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
- }
-
if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg ||
cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed ||
cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32x) {
@@ -682,29 +595,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
return;
}
- if (cpu->cfg.ext_zk) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkn), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkr), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkt), true);
- }
-
- if (cpu->cfg.ext_zkn) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkne), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknd), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknh), true);
- }
-
- if (cpu->cfg.ext_zks) {
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksed), true);
- cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksh), true);
- }
-
if (cpu->cfg.ext_zicntr && !cpu->cfg.ext_zicsr) {
if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zicntr))) {
error_setg(errp, "zicntr requires zicsr");
@@ -833,11 +723,151 @@ static void riscv_cpu_validate_profiles(RISCVCPU *cpu)
}
}
+static void riscv_cpu_init_implied_exts_rules(void)
+{
+ RISCVCPUImpliedExtsRule *rule;
+#ifndef CONFIG_USER_ONLY
+ MachineState *ms = MACHINE(qdev_get_machine());
+#endif
+ static bool initialized;
+ int i;
+
+ /* Implied rules only need to be initialized once. */
+ if (initialized) {
+ return;
+ }
+
+ for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) {
+#ifndef CONFIG_USER_ONLY
+ rule->enabled = bitmap_new(ms->smp.cpus);
+#endif
+ g_hash_table_insert(misa_ext_implied_rules,
+ GUINT_TO_POINTER(rule->ext), (gpointer)rule);
+ }
+
+ for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) {
+#ifndef CONFIG_USER_ONLY
+ rule->enabled = bitmap_new(ms->smp.cpus);
+#endif
+ g_hash_table_insert(multi_ext_implied_rules,
+ GUINT_TO_POINTER(rule->ext), (gpointer)rule);
+ }
+
+ initialized = true;
+}
+
+static void cpu_enable_implied_rule(RISCVCPU *cpu,
+ RISCVCPUImpliedExtsRule *rule)
+{
+ CPURISCVState *env = &cpu->env;
+ RISCVCPUImpliedExtsRule *ir;
+ bool enabled = false;
+ int i;
+
+#ifndef CONFIG_USER_ONLY
+ enabled = test_bit(cpu->env.mhartid, rule->enabled);
+#endif
+
+ if (!enabled) {
+ /* Enable the implied MISAs. */
+ if (rule->implied_misa_exts) {
+ riscv_cpu_set_misa_ext(env,
+ env->misa_ext | rule->implied_misa_exts);
+
+ for (i = 0; misa_bits[i] != 0; i++) {
+ if (rule->implied_misa_exts & misa_bits[i]) {
+ ir = g_hash_table_lookup(misa_ext_implied_rules,
+ GUINT_TO_POINTER(misa_bits[i]));
+
+ if (ir) {
+ cpu_enable_implied_rule(cpu, ir);
+ }
+ }
+ }
+ }
+
+ /* Enable the implied extensions. */
+ for (i = 0;
+ rule->implied_multi_exts[i] != RISCV_IMPLIED_EXTS_RULE_END; i++) {
+ cpu_cfg_ext_auto_update(cpu, rule->implied_multi_exts[i], true);
+
+ ir = g_hash_table_lookup(multi_ext_implied_rules,
+ GUINT_TO_POINTER(
+ rule->implied_multi_exts[i]));
+
+ if (ir) {
+ cpu_enable_implied_rule(cpu, ir);
+ }
+ }
+
+#ifndef CONFIG_USER_ONLY
+ bitmap_set(rule->enabled, cpu->env.mhartid, 1);
+#endif
+ }
+}
+
+/* Zc extension has special implied rules that need to be handled separately. */
+static void cpu_enable_zc_implied_rules(RISCVCPU *cpu)
+{
+ RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu);
+ CPURISCVState *env = &cpu->env;
+
+ if (cpu->cfg.ext_zce) {
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true);
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true);
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true);
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true);
+
+ if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) {
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true);
+ }
+ }
+
+ /* Zca, Zcd and Zcf has a PRIV 1.12.0 restriction */
+ if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) {
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true);
+
+ if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) {
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true);
+ }
+
+ if (riscv_has_ext(env, RVD)) {
+ cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true);
+ }
+ }
+}
+
+static void riscv_cpu_enable_implied_rules(RISCVCPU *cpu)
+{
+ RISCVCPUImpliedExtsRule *rule;
+ int i;
+
+ /* Enable the implied extensions for Zc. */
+ cpu_enable_zc_implied_rules(cpu);
+
+ /* Enable the implied MISAs. */
+ for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) {
+ if (riscv_has_ext(&cpu->env, rule->ext)) {
+ cpu_enable_implied_rule(cpu, rule);
+ }
+ }
+
+ /* Enable the implied extensions. */
+ for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) {
+ if (isa_ext_is_enabled(cpu, rule->ext)) {
+ cpu_enable_implied_rule(cpu, rule);
+ }
+ }
+}
+
void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
{
CPURISCVState *env = &cpu->env;
Error *local_err = NULL;
+ riscv_cpu_init_implied_exts_rules();
+ riscv_cpu_enable_implied_rules(cpu);
+
riscv_cpu_validate_misa_priv(env, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
@@ -1343,6 +1373,15 @@ static void riscv_tcg_cpu_instance_init(CPUState *cs)
misa_ext_user_opts = g_hash_table_new(NULL, g_direct_equal);
multi_ext_user_opts = g_hash_table_new(NULL, g_direct_equal);
+
+ if (!misa_ext_implied_rules) {
+ misa_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal);
+ }
+
+ if (!multi_ext_implied_rules) {
+ multi_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal);
+ }
+
riscv_cpu_add_user_properties(obj);
if (riscv_cpu_has_max_extensions(obj)) {
diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig
index d886be4..8a95f2b 100644
--- a/target/s390x/Kconfig
+++ b/target/s390x/Kconfig
@@ -2,3 +2,8 @@ config S390X
bool
select PCI
select S390_FLIC
+
+config S390X_LEGACY_CPUS
+ bool
+ default y
+ depends on S390X
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index 7e8a1b4..029d91d 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -102,7 +102,7 @@ static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
regs->acrs[i] = cpu_to_be32(cpu->env.aregs[i]);
regs->gprs[i] = cpu_to_be64(cpu->env.regs[i]);
}
- note->contents.prstatus.pid = id;
+ note->contents.prstatus.pid = cpu_to_be32(id);
}
static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index efb508c..a27f4b6 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -25,6 +25,7 @@
#ifndef CONFIG_USER_ONLY
#include "sysemu/sysemu.h"
#include "target/s390x/kvm/pv.h"
+#include CONFIG_DEVICES
#endif
#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
@@ -47,6 +48,13 @@
* generation 15 one base feature and one optional feature have been deprecated.
*/
static S390CPUDef s390_cpu_defs[] = {
+ /*
+ * Linux requires at least z10 nowadays, and IBM only supports recent CPUs
+ * (see https://www.ibm.com/support/pages/ibm-mainframe-life-cycle-history),
+ * so we consider older CPUs as legacy that can optionally be disabled via
+ * the CONFIG_S390X_LEGACY_CPUS config switch.
+ */
+#if defined(CONFIG_S390X_LEGACY_CPUS) || defined(CONFIG_USER_ONLY)
CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
CPUDEF_INIT(0x2064, 7, 3, 38, 0x00000000U, "z900.3", "IBM zSeries 900 GA3"),
@@ -64,6 +72,7 @@ static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x2096, 9, 2, 40, 0x00000000U, "z9BC", "IBM System z9 BC GA1"),
CPUDEF_INIT(0x2094, 9, 3, 40, 0x00000000U, "z9EC.3", "IBM System z9 EC GA3"),
CPUDEF_INIT(0x2096, 9, 3, 40, 0x00000000U, "z9BC.2", "IBM System z9 BC GA2"),
+#endif
CPUDEF_INIT(0x2097, 10, 1, 43, 0x00000000U, "z10EC", "IBM System z10 EC GA1"),
CPUDEF_INIT(0x2097, 10, 2, 43, 0x00000000U, "z10EC.2", "IBM System z10 EC GA2"),
CPUDEF_INIT(0x2098, 10, 2, 43, 0x00000000U, "z10BC", "IBM System z10 BC GA1"),
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 1b494ec..94181d9 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -40,7 +40,7 @@
#include "sysemu/hw_accel.h"
#include "sysemu/runstate.h"
#include "sysemu/device_tree.h"
-#include "exec/gdbstub.h"
+#include "gdbstub/enums.h"
#include "exec/ram_addr.h"
#include "trace.h"
#include "hw/s390x/s390-pci-inst.h"
diff --git a/tests/qtest/fuzz/qos_fuzz.c b/tests/qtest/fuzz/qos_fuzz.c
index b71e945..d3839bf 100644
--- a/tests/qtest/fuzz/qos_fuzz.c
+++ b/tests/qtest/fuzz/qos_fuzz.c
@@ -180,6 +180,7 @@ static void walk_path(QOSGraphNode *orig_path, int len)
fuzz_path_vec = path_vec;
} else {
+ g_string_free(cmd_line, true);
g_free(path_vec);
}
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 6a0de33..213114b 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -645,6 +645,11 @@ int64_t qemu_clock_get_ns(QEMUClockType type)
}
}
+static void qemu_virtual_clock_set_ns(int64_t time)
+{
+ return cpus_set_virtual_clock(time);
+}
+
void init_clocks(QEMUTimerListNotifyCB *notify_cb)
{
QEMUClockType type;
@@ -675,3 +680,24 @@ bool qemu_clock_run_all_timers(void)
return progress;
}
+
+int64_t qemu_clock_advance_virtual_time(int64_t dest)
+{
+ int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ AioContext *aio_context;
+ aio_context = qemu_get_aio_context();
+ while (clock < dest) {
+ int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
+ QEMU_TIMER_ATTR_ALL);
+ int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
+
+ qemu_virtual_clock_set_ns(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + warp);
+
+ qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+ timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
+ clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ }
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+
+ return clock;
+}