aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--contrib/plugins/execlog.c1
-rw-r--r--contrib/plugins/meson.build3
-rw-r--r--contrib/plugins/uftrace.c878
-rwxr-xr-xcontrib/plugins/uftrace_symbols.py152
-rw-r--r--docs/about/emulation.rst199
-rw-r--r--docs/devel/build-system.rst2
-rw-r--r--docs/devel/code-provenance.rst33
-rw-r--r--docs/devel/memory.rst17
-rw-r--r--docs/devel/rust.rst2
-rw-r--r--docs/system/devices/igb.rst2
-rw-r--r--hw/core/register.c1
-rw-r--r--hw/hyperv/hv-balloon.c12
-rw-r--r--hw/intc/arm_gicv3_cpuif.c10
-rw-r--r--hw/ppc/spapr_pci_vfio.c14
-rw-r--r--hw/s390x/s390-pci-vfio.c16
-rw-r--r--hw/sd/sdhci.c4
-rw-r--r--hw/usb/hcd-uhci.c10
-rw-r--r--hw/vfio-user/container.c18
-rw-r--r--hw/vfio-user/container.h4
-rw-r--r--hw/vfio-user/pci.c35
-rw-r--r--hw/vfio/container-base.c347
-rw-r--r--hw/vfio/container-legacy.c1277
-rw-r--r--hw/vfio/container.c1325
-rw-r--r--hw/vfio/cpr-iommufd.c4
-rw-r--r--hw/vfio/cpr-legacy.c43
-rw-r--r--hw/vfio/device.c4
-rw-r--r--hw/vfio/iommufd.c48
-rw-r--r--hw/vfio/listener.c74
-rw-r--r--hw/vfio/meson.build2
-rw-r--r--hw/vfio/pci-quirks.c9
-rw-r--r--hw/vfio/pci.c72
-rw-r--r--hw/vfio/pci.h2
-rw-r--r--hw/vfio/region.c3
-rw-r--r--hw/vfio/spapr.c52
-rw-r--r--hw/vfio/types.h4
-rw-r--r--hw/vfio/vfio-iommufd.h9
-rw-r--r--hw/vfio/vfio-listener.h4
-rw-r--r--hw/xen/xen_pt_msi.c11
-rw-r--r--include/hw/vfio/vfio-container-base.h279
-rw-r--r--include/hw/vfio/vfio-container-legacy.h39
-rw-r--r--include/hw/vfio/vfio-container.h286
-rw-r--r--include/hw/vfio/vfio-cpr.h15
-rw-r--r--include/hw/vfio/vfio-device.h12
-rw-r--r--include/semihosting/common-semi.h6
-rw-r--r--include/semihosting/guestfd.h7
-rw-r--r--include/semihosting/semihost.h2
-rw-r--r--include/semihosting/syscalls.h30
-rw-r--r--linux-user/arm/target_proc.h2
-rw-r--r--linux-user/strace.c2
-rw-r--r--meson.build1
-rw-r--r--rust/Cargo.lock11
-rw-r--r--rust/bql/meson.build1
-rw-r--r--rust/common/meson.build4
-rw-r--r--rust/common/src/uninit.rs4
-rw-r--r--rust/hw/core/src/qdev.rs105
-rw-r--r--rust/hw/timer/hpet/src/device.rs55
-rw-r--r--rust/meson.build2
-rw-r--r--rust/migration/meson.build1
-rw-r--r--rust/migration/src/vmstate.rs2
-rw-r--r--rust/qemu-macros/Cargo.toml1
-rw-r--r--rust/qemu-macros/meson.build1
-rw-r--r--rust/qemu-macros/src/lib.rs108
-rw-r--r--rust/qemu-macros/src/tests.rs113
-rw-r--r--rust/qom/meson.build1
-rw-r--r--rust/util/meson.build5
-rwxr-xr-xscripts/archive-source.sh2
-rwxr-xr-xscripts/checkpatch.pl3
-rwxr-xr-xscripts/ci/gitlab-failure-analysis117
-rwxr-xr-xscripts/make-release2
-rw-r--r--semihosting/arm-compat-semi-stub.c19
-rw-r--r--semihosting/arm-compat-semi.c63
-rw-r--r--semihosting/guestfd.c26
-rw-r--r--semihosting/meson.build18
-rw-r--r--semihosting/syscalls.c109
-rw-r--r--subprojects/.gitignore6
-rw-r--r--subprojects/attrs-0.2-rs.wrap7
-rw-r--r--subprojects/packagefiles/attrs-0.2-rs/meson.build33
-rw-r--r--target/arm/arm-powerctl.c26
-rw-r--r--target/arm/common-semi-target.c (renamed from target/arm/common-semi-target.h)22
-rw-r--r--target/arm/cpregs.h111
-rw-r--r--target/arm/cpu-features.h415
-rw-r--r--target/arm/cpu-sysregs.h.inc1
-rw-r--r--target/arm/cpu.c16
-rw-r--r--target/arm/cpu.h413
-rw-r--r--target/arm/gdbstub.c14
-rw-r--r--target/arm/helper.c933
-rw-r--r--target/arm/hvf/hvf.c240
-rw-r--r--target/arm/hvf/sysreg.c.inc147
-rw-r--r--target/arm/internals.h3
-rw-r--r--target/arm/kvm-consts.h14
-rw-r--r--target/arm/kvm.c12
-rw-r--r--target/arm/meson.build4
-rw-r--r--target/arm/tcg/hflags.c8
-rw-r--r--target/arm/tcg/translate-a64.c47
-rw-r--r--target/arm/tcg/translate.h2
-rw-r--r--target/arm/trace-events10
-rw-r--r--target/riscv/common-semi-target.c (renamed from target/riscv/common-semi-target.h)23
-rw-r--r--target/riscv/meson.build4
-rw-r--r--tests/functional/x86_64/meson.build1
-rwxr-xr-xtests/functional/x86_64/test_vfio_user_client.py201
101 files changed, 5237 insertions, 3634 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 24b71a4..7d134a8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4318,6 +4318,7 @@ F: docs/system/devices/vfio-user.rst
F: hw/vfio-user/*
F: include/hw/vfio-user/*
F: subprojects/libvfio-user
+F: tests/functional/x86_64/test_vfio_user_client.py
EBPF:
M: Jason Wang <jasowang@redhat.com>
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index 06ec76d..811f320 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -95,6 +95,7 @@ static void insn_check_regs(CPU *cpu)
g_byte_array_set_size(reg->new, 0);
sz = qemu_plugin_read_register(reg->handle, reg->new);
+ g_assert(sz > 0);
g_assert(sz == reg->last->len);
if (memcmp(reg->last->data, reg->new->data, sz)) {
diff --git a/contrib/plugins/meson.build b/contrib/plugins/meson.build
index 1876bc7..7eb3629 100644
--- a/contrib/plugins/meson.build
+++ b/contrib/plugins/meson.build
@@ -1,5 +1,6 @@
contrib_plugins = ['bbv', 'cache', 'cflow', 'drcov', 'execlog', 'hotblocks',
- 'hotpages', 'howvec', 'hwprofile', 'ips', 'stoptrigger']
+ 'hotpages', 'howvec', 'hwprofile', 'ips', 'stoptrigger',
+ 'uftrace']
if host_os != 'windows'
# lockstep uses socket.h
contrib_plugins += 'lockstep'
diff --git a/contrib/plugins/uftrace.c b/contrib/plugins/uftrace.c
new file mode 100644
index 0000000..b7d6124
--- /dev/null
+++ b/contrib/plugins/uftrace.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (C) 2025, Pierrick Bouvier <pierrick.bouvier@linaro.org>
+ *
+ * Generates a trace compatible with uftrace (similar to uftrace record).
+ * https://github.com/namhyung/uftrace
+ *
+ * See docs/about/emulation.rst|Uftrace for details and examples.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <qemu-plugin.h>
+#include <glib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#define MiB (INT64_C(1) << 20)
+#define NANOSECONDS_PER_SECOND 1000000000LL
+#define TRACE_FLUSH_SIZE (32 * MiB)
+#define TRACE_ID_SCALE 100
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+typedef struct {
+ GArray *s;
+} Callstack;
+
+typedef struct {
+ uint64_t pc;
+ uint64_t frame_pointer;
+} CallstackEntry;
+
+typedef struct {
+ GArray *t;
+ GString *path;
+ GString *name;
+ uint32_t id;
+} Trace;
+
+typedef struct Cpu Cpu;
+
+typedef struct {
+ void (*init)(Cpu *cpu);
+ void (*end)(Cpu *cpu);
+ uint64_t (*get_frame_pointer)(Cpu *cpu);
+ uint8_t (*get_privilege_level)(Cpu *cpu);
+ uint8_t (*num_privilege_levels)(void);
+ const char *(*get_privilege_level_name)(uint8_t pl);
+ bool (*does_insn_modify_frame_pointer)(const char *disas);
+} CpuOps;
+
+typedef struct Cpu {
+ Trace *trace;
+ Callstack *cs;
+ uint8_t privilege_level;
+ GArray *traces; /* Trace *traces [] */
+ GByteArray *buf;
+ CpuOps ops;
+ void *arch;
+} Cpu;
+
+typedef enum {
+ AARCH64_EL0_SECURE,
+ AARCH64_EL0_NONSECURE,
+ AARCH64_EL0_REALM,
+ AARCH64_EL1_SECURE,
+ AARCH64_EL1_NONSECURE,
+ AARCH64_EL1_REALM,
+ AARCH64_EL2_SECURE,
+ AARCH64_EL2_NONSECURE,
+ AARCH64_EL2_REALM,
+ AARCH64_EL3,
+ AARCH64_PRIVILEGE_LEVEL_MAX,
+} Aarch64PrivilegeLevel;
+
+typedef struct {
+ struct qemu_plugin_register *reg_fp;
+ struct qemu_plugin_register *reg_cpsr;
+ struct qemu_plugin_register *reg_scr_el3;
+} Aarch64Cpu;
+
+typedef enum {
+ X64_RING0,
+ X64_RING1,
+ X64_RING2,
+ X64_RING3,
+ X64_REAL_MODE,
+ X64_PRIVILEGE_LEVEL_MAX,
+} X64PrivilegeLevel;
+
+typedef struct {
+ struct qemu_plugin_register *reg_rbp;
+ struct qemu_plugin_register *reg_cs;
+ struct qemu_plugin_register *reg_cr0;
+} X64Cpu;
+
+typedef struct {
+ uint64_t timestamp;
+ uint64_t data;
+} UftraceEntry;
+
+typedef enum {
+ UFTRACE_ENTRY,
+ UFTRACE_EXIT,
+ UFTRACE_LOST,
+ UFTRACE_EVENT,
+} UftraceRecordType;
+
+static struct qemu_plugin_scoreboard *score;
+static bool trace_privilege_level;
+static CpuOps arch_ops;
+
+static uint64_t gettime_ns(void)
+{
+#ifdef _WIN32
+ /*
+ * On Windows, timespec_get is available only with UCRT, but not with
+ * MinGW64 environment. Simplify by using only gettimeofday on this
+ * platform. This may result in a precision loss.
+ */
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ uint64_t now_ns = tv.tv_sec * NANOSECONDS_PER_SECOND + tv.tv_usec * 1000;
+#else
+ /* We need nanosecond precision for short lived functions. */
+ struct timespec ts;
+ timespec_get(&ts, TIME_UTC);
+ uint64_t now_ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec;
+#endif
+ return now_ns;
+}
+
+static void uftrace_write_map(bool system_emulation)
+{
+ const char *path = "./uftrace.data/sid-0.map";
+
+ if (system_emulation && access(path, F_OK) == 0) {
+ /* do not erase existing map in system emulation, as a custom one might
+ * already have been generated by uftrace_symbols.py */
+ return;
+ }
+
+ FILE *sid_map = fopen(path, "w");
+ g_assert(sid_map);
+
+ if (system_emulation) {
+ fprintf(sid_map,
+ "# map stack on highest address possible, to prevent uftrace\n"
+ "# from considering any kernel address\n");
+ fprintf(sid_map,
+ "ffffffffffff-ffffffffffff rw-p 00000000 00:00 0 [stack]\n");
+ } else {
+ /* in user mode, copy /proc/self/maps instead */
+ FILE *self_map = fopen("/proc/self/maps", "r");
+ g_assert(self_map);
+ for (;;) {
+ int c = fgetc(self_map);
+ if (c == EOF) {
+ break;
+ }
+ fputc(c, sid_map);
+ }
+ fclose(self_map);
+ }
+ fclose(sid_map);
+}
+
+static void uftrace_write_task(const GArray *traces)
+{
+ FILE *task = fopen("./uftrace.data/task.txt", "w");
+ g_assert(task);
+ for (int i = 0; i < traces->len; ++i) {
+ Trace *t = g_array_index(traces, Trace*, i);
+ fprintf(task, "SESS timestamp=0.0 pid=%"PRIu32" sid=0 exename=\"%s\"\n",
+ t->id, t->name->str);
+ fprintf(task, "TASK timestamp=0.0 tid=%"PRIu32" pid=%"PRIu32"\n",
+ t->id, t->id);
+ }
+ fclose(task);
+}
+
+static void uftrace_write_info(const GArray *traces)
+{
+ g_autoptr(GString) taskinfo_tids = g_string_new("taskinfo:tids=");
+ for (int i = 0; i < traces->len; ++i) {
+ Trace *t = g_array_index(traces, Trace*, i);
+ const char *delim = i > 0 ? "," : "";
+ g_string_append_printf(taskinfo_tids, "%s%"PRIu32, delim, t->id);
+ }
+
+ g_autoptr(GString) taskinfo_nr_tid = g_string_new("taskinfo:nr_tid=");
+ g_string_append_printf(taskinfo_nr_tid, "%d", traces->len);
+
+ FILE *info = fopen("./uftrace.data/info", "w");
+ g_assert(info);
+ /*
+ * $ uftrace dump --debug
+ * uftrace file header: magic = 4674726163652100
+ * uftrace file header: version = 4
+ * uftrace file header: header size = 40
+ * uftrace file header: endian = 1 (little)
+ * uftrace file header: class = 2 (64 bit)
+ * uftrace file header: features = 0x1263 (PLTHOOK | ...
+ * uftrace file header: info = 0x7bff (EXE_NAME | ...
+ * <0000000000000000>: 46 74 72 61 63 65 21 00 04 00 00 00 28 00 01 02
+ * <0000000000000010>: 63 12 00 00 00 00 00 00 ff 7b 00 00 00 00 00 00
+ * <0000000000000020>: 00 04 00 00 00 00 00 00
+ */
+ const uint8_t header[] = {0x46, 0x74, 0x72, 0x61, 0x63, 0x65, 0x21, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x28, 0x00, 0x01, 0x02,
+ 0x63, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xff, 0x7b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+ size_t wrote = fwrite(header, sizeof(header), 1, info);
+ g_assert(wrote == 1);
+ const char *info_data[] = {
+ "exename:",
+ "build_id:0000000000000000000000000000000000000000",
+ "exit_status:",
+ "cmdline:",
+ "cpuinfo:lines=2",
+ "cpuinfo:nr_cpus=",
+ "cpuinfo:desc=",
+ "meminfo:",
+ "osinfo:lines=3",
+ "osinfo:kernel=",
+ "osinfo:hostname=",
+ "osinfo:distro=",
+ "taskinfo:lines=2",
+ taskinfo_nr_tid->str,
+ taskinfo_tids->str,
+ "usageinfo:lines=6",
+ "usageinfo:systime=",
+ "usageinfo:usrtime=",
+ "usageinfo:ctxsw=",
+ "usageinfo:maxrss=",
+ "usageinfo:pagefault=",
+ "usageinfo:iops=",
+ "loadinfo:",
+ "record_date:",
+ "elapsed_time:",
+ "pattern_type:regex",
+ "uftrace_version:",
+ "utc_offset:",
+ 0};
+ const char **info_data_it = info_data;
+ while (*(info_data_it)) {
+ fprintf(info, "%s\n", *info_data_it);
+ ++info_data_it;
+ }
+ fclose(info);
+}
+
+static Callstack *callstack_new(void)
+{
+ Callstack *cs = g_new0(Callstack, 1);
+ cs->s = g_array_new(false, false, sizeof(CallstackEntry));
+ return cs;
+}
+
+static void callstack_free(Callstack *cs)
+{
+ g_array_free(cs->s, true);
+ cs->s = NULL;
+ g_free(cs);
+}
+
+static size_t callstack_depth(const Callstack *cs)
+{
+ return cs->s->len;
+}
+
+static size_t callstack_empty(const Callstack *cs)
+{
+ return callstack_depth(cs) == 0;
+}
+
+static void callstack_clear(Callstack *cs)
+{
+ g_array_set_size(cs->s, 0);
+}
+
+static const CallstackEntry *callstack_at(const Callstack *cs, size_t depth)
+{
+ g_assert(depth > 0);
+ g_assert(depth <= callstack_depth(cs));
+ return &g_array_index(cs->s, CallstackEntry, depth - 1);
+}
+
+static CallstackEntry callstack_top(const Callstack *cs)
+{
+ if (callstack_depth(cs) >= 1) {
+ return *callstack_at(cs, callstack_depth(cs));
+ }
+ return (CallstackEntry){};
+}
+
+static CallstackEntry callstack_caller(const Callstack *cs)
+{
+ if (callstack_depth(cs) >= 2) {
+ return *callstack_at(cs, callstack_depth(cs) - 1);
+ }
+ return (CallstackEntry){};
+}
+
+static void callstack_push(Callstack *cs, CallstackEntry e)
+{
+ g_array_append_val(cs->s, e);
+}
+
+static CallstackEntry callstack_pop(Callstack *cs)
+{
+ g_assert(!callstack_empty(cs));
+ CallstackEntry e = callstack_top(cs);
+ g_array_set_size(cs->s, callstack_depth(cs) - 1);
+ return e;
+}
+
+static Trace *trace_new(uint32_t id, GString *name)
+{
+ Trace *t = g_new0(Trace, 1);
+ t->t = g_array_new(false, false, sizeof(UftraceEntry));
+ t->path = g_string_new(NULL);
+ g_string_append_printf(t->path, "./uftrace.data/%"PRIu32".dat", id);
+ t->name = g_string_new(name->str);
+ t->id = id;
+ return t;
+}
+
+static void trace_free(Trace *t)
+{
+ g_assert(t->t->len == 0);
+ g_array_free(t->t, true);
+ t->t = NULL;
+ g_string_free(t->path, true);
+ t->path = NULL;
+ g_string_free(t->name, true);
+ t->name = NULL;
+ g_free(t);
+}
+
+static void trace_flush(Trace *t, bool append)
+{
+ int create_dir = g_mkdir_with_parents("./uftrace.data",
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ g_assert(create_dir == 0);
+ FILE *dat = fopen(t->path->str, append ? "a" : "w");
+ g_assert(dat);
+ GArray *data = t->t;
+ if (data->len) {
+ size_t wrote = fwrite(data->data, sizeof(UftraceEntry), data->len, dat);
+ g_assert(wrote == data->len);
+ }
+ fclose(dat);
+ g_array_set_size(data, 0);
+}
+
+static void trace_add_entry(Trace *t, uint64_t timestamp, uint64_t pc,
+ size_t depth, UftraceRecordType type)
+{
+ /* https://github.com/namhyung/uftrace/blob/v0.18/libmcount/record.c#L909 */
+ const uint64_t record_magic = 0x5;
+ uint64_t data = type | (record_magic << 3);
+ data += depth << 6;
+ data += pc << 16;
+ UftraceEntry e = {.timestamp = timestamp, .data = data};
+ g_array_append_val(t->t, e);
+ if (t->t->len * sizeof(UftraceEntry) > TRACE_FLUSH_SIZE) {
+ trace_flush(t, true);
+ }
+}
+
+static void trace_enter_function(Trace *t, uint64_t timestamp,
+ uint64_t pc, size_t depth)
+{
+ trace_add_entry(t, timestamp, pc, depth, UFTRACE_ENTRY);
+}
+
+static void trace_exit_function(Trace *t, uint64_t timestamp,
+ uint64_t pc, size_t depth)
+{
+ trace_add_entry(t, timestamp, pc, depth, UFTRACE_EXIT);
+}
+
+static void trace_enter_stack(Trace *t, Callstack *cs, uint64_t timestamp)
+{
+ for (size_t depth = 1; depth <= callstack_depth(cs); ++depth) {
+ trace_enter_function(t, timestamp, callstack_at(cs, depth)->pc, depth);
+ }
+}
+
+static void trace_exit_stack(Trace *t, Callstack *cs, uint64_t timestamp)
+{
+ for (size_t depth = callstack_depth(cs); depth > 0; --depth) {
+ trace_exit_function(t, timestamp, callstack_at(cs, depth)->pc, depth);
+ }
+}
+
+static uint64_t cpu_read_register64(Cpu *cpu, struct qemu_plugin_register *reg)
+{
+ GByteArray *buf = cpu->buf;
+ g_byte_array_set_size(buf, 0);
+ size_t sz = qemu_plugin_read_register(reg, buf);
+ g_assert(sz == 8);
+ g_assert(buf->len == 8);
+ return *((uint64_t *) buf->data);
+}
+
+static uint32_t cpu_read_register32(Cpu *cpu, struct qemu_plugin_register *reg)
+{
+ GByteArray *buf = cpu->buf;
+ g_byte_array_set_size(buf, 0);
+ size_t sz = qemu_plugin_read_register(reg, buf);
+ g_assert(sz == 4);
+ g_assert(buf->len == 4);
+ return *((uint32_t *) buf->data);
+}
+
+static uint64_t cpu_read_memory64(Cpu *cpu, uint64_t addr)
+{
+ g_assert(addr);
+ GByteArray *buf = cpu->buf;
+ g_byte_array_set_size(buf, 0);
+ bool read = qemu_plugin_read_memory_vaddr(addr, buf, 8);
+ if (!read) {
+ return 0;
+ }
+ g_assert(buf->len == 8);
+ return *((uint64_t *) buf->data);
+}
+
+static void cpu_unwind_stack(Cpu *cpu, uint64_t frame_pointer, uint64_t pc)
+{
+ g_assert(callstack_empty(cpu->cs));
+
+ #define UNWIND_STACK_MAX_DEPTH 1024
+ CallstackEntry unwind[UNWIND_STACK_MAX_DEPTH];
+ size_t depth = 0;
+ do {
+ /* check we don't have an infinite stack */
+ for (size_t i = 0; i < depth; ++i) {
+ if (frame_pointer == unwind[i].frame_pointer) {
+ break;
+ }
+ }
+ CallstackEntry e = {.frame_pointer = frame_pointer, .pc = pc};
+ unwind[depth] = e;
+ depth++;
+ if (frame_pointer) {
+ frame_pointer = cpu_read_memory64(cpu, frame_pointer);
+ }
+ pc = cpu_read_memory64(cpu, frame_pointer + 8); /* read previous lr */
+ } while (frame_pointer && pc && depth < UNWIND_STACK_MAX_DEPTH);
+ #undef UNWIND_STACK_MAX_DEPTH
+
+ /* push it from bottom to top */
+ while (depth) {
+ callstack_push(cpu->cs, unwind[depth - 1]);
+ --depth;
+ }
+}
+
+static struct qemu_plugin_register *plugin_find_register(const char *name)
+{
+ g_autoptr(GArray) regs = qemu_plugin_get_registers();
+ for (int i = 0; i < regs->len; ++i) {
+ qemu_plugin_reg_descriptor *reg;
+ reg = &g_array_index(regs, qemu_plugin_reg_descriptor, i);
+ if (!strcmp(reg->name, name)) {
+ return reg->handle;
+ }
+ }
+ return NULL;
+}
+
+static uint8_t aarch64_num_privilege_levels(void)
+{
+ return AARCH64_PRIVILEGE_LEVEL_MAX;
+}
+
+static const char *aarch64_get_privilege_level_name(uint8_t pl)
+{
+ switch (pl) {
+ case AARCH64_EL0_SECURE: return "S-EL0";
+ case AARCH64_EL0_NONSECURE: return "NS-EL0";
+ case AARCH64_EL0_REALM: return "R-EL0";
+ case AARCH64_EL1_SECURE: return "S-EL1";
+ case AARCH64_EL1_NONSECURE: return "NS-EL1";
+ case AARCH64_EL1_REALM: return "R-EL1";
+ case AARCH64_EL2_SECURE: return "S-EL2";
+ case AARCH64_EL2_NONSECURE: return "NS-EL2";
+ case AARCH64_EL2_REALM: return "R-EL2";
+ case AARCH64_EL3: return "EL3";
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint8_t aarch64_get_privilege_level(Cpu *cpu_)
+{
+ Aarch64Cpu *cpu = cpu_->arch;
+ /*
+ * QEMU gdbstub does not provide access to CurrentEL,
+ * so we use CPSR instead.
+ */
+ uint8_t el = cpu_read_register32(cpu_, cpu->reg_cpsr) >> 2 & 0b11;
+
+ if (el == 3) {
+ return AARCH64_EL3;
+ }
+
+ uint8_t ss = AARCH64_EL0_SECURE;
+ if (!cpu->reg_scr_el3) {
+ ss = AARCH64_EL0_NONSECURE;
+ }
+ uint64_t scr_el3 = cpu_read_register64(cpu_, cpu->reg_scr_el3);
+ uint64_t ns = (scr_el3 >> 0) & 0b1;
+ uint64_t nse = (scr_el3 >> 62) & 0b1;
+ switch (nse << 1 | ns) {
+ case 0b00:
+ ss = AARCH64_EL0_SECURE;
+ break;
+ case 0b01:
+ ss = AARCH64_EL0_NONSECURE;
+ break;
+ case 0b11:
+ ss = AARCH64_EL0_REALM;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ const uint8_t num_ss = 3;
+ Aarch64PrivilegeLevel pl = el * num_ss + ss;
+ return pl;
+}
+
+static uint64_t aarch64_get_frame_pointer(Cpu *cpu_)
+{
+ Aarch64Cpu *cpu = cpu_->arch;
+ return cpu_read_register64(cpu_, cpu->reg_fp);
+}
+
+static void aarch64_init(Cpu *cpu_)
+{
+ Aarch64Cpu *cpu = g_new0(Aarch64Cpu, 1);
+ cpu_->arch = cpu;
+ cpu->reg_fp = plugin_find_register("x29");
+ if (!cpu->reg_fp) {
+ fprintf(stderr, "uftrace plugin: frame pointer register (x29) is not "
+ "available. Please use an AArch64 cpu (or -cpu max).\n");
+ g_abort();
+ }
+ cpu->reg_cpsr = plugin_find_register("cpsr");
+ g_assert(cpu->reg_cpsr);
+ cpu->reg_scr_el3 = plugin_find_register("SCR_EL3");
+ /* scr_el3 is optional */
+}
+
+static void aarch64_end(Cpu *cpu)
+{
+ g_free(cpu->arch);
+}
+
+static bool aarch64_does_insn_modify_frame_pointer(const char *disas)
+{
+ /*
+ * Check if current instruction concerns fp register "x29".
+ * We add a prefix space to make sure we don't match addresses dump
+ * in disassembly.
+ */
+ return strstr(disas, " x29");
+}
+
+static CpuOps aarch64_ops = {
+ .init = aarch64_init,
+ .end = aarch64_end,
+ .get_frame_pointer = aarch64_get_frame_pointer,
+ .get_privilege_level = aarch64_get_privilege_level,
+ .num_privilege_levels = aarch64_num_privilege_levels,
+ .get_privilege_level_name = aarch64_get_privilege_level_name,
+ .does_insn_modify_frame_pointer = aarch64_does_insn_modify_frame_pointer,
+};
+
+static uint8_t x64_num_privilege_levels(void)
+{
+ return X64_PRIVILEGE_LEVEL_MAX;
+}
+
+static const char *x64_get_privilege_level_name(uint8_t pl)
+{
+ switch (pl) {
+ case X64_RING0: return "Ring0";
+ case X64_RING1: return "Ring1";
+ case X64_RING2: return "Ring2";
+ case X64_RING3: return "Ring3";
+ case X64_REAL_MODE: return "RealMode";
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint8_t x64_get_privilege_level(Cpu *cpu_)
+{
+ X64Cpu *cpu = cpu_->arch;
+ uint64_t cr0 = cpu_read_register64(cpu_, cpu->reg_cr0);
+ uint64_t protected_mode = (cr0 >> 0) & 0b1;
+ if (!protected_mode) {
+ return X64_REAL_MODE;
+ }
+ uint32_t cs = cpu_read_register32(cpu_, cpu->reg_cs);
+ uint32_t ring_level = (cs >> 0) & 0b11;
+ return ring_level;
+}
+
+static uint64_t x64_get_frame_pointer(Cpu *cpu_)
+{
+ X64Cpu *cpu = cpu_->arch;
+ return cpu_read_register64(cpu_, cpu->reg_rbp);
+}
+
+static void x64_init(Cpu *cpu_)
+{
+ X64Cpu *cpu = g_new0(X64Cpu, 1);
+ cpu_->arch = cpu;
+ cpu->reg_rbp = plugin_find_register("rbp");
+ g_assert(cpu->reg_rbp);
+ cpu->reg_cs = plugin_find_register("cs");
+ g_assert(cpu->reg_cs);
+ cpu->reg_cr0 = plugin_find_register("cr0");
+ g_assert(cpu->reg_cr0);
+}
+
+static void x64_end(Cpu *cpu)
+{
+ g_free(cpu->arch);
+}
+
+static bool x64_does_insn_modify_frame_pointer(const char *disas)
+{
+ return strstr(disas, "rbp");
+}
+
+static CpuOps x64_ops = {
+ .init = x64_init,
+ .end = x64_end,
+ .get_frame_pointer = x64_get_frame_pointer,
+ .get_privilege_level = x64_get_privilege_level,
+ .num_privilege_levels = x64_num_privilege_levels,
+ .get_privilege_level_name = x64_get_privilege_level_name,
+ .does_insn_modify_frame_pointer = x64_does_insn_modify_frame_pointer,
+};
+
+static void track_privilege_change(unsigned int cpu_index, void *udata)
+{
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, cpu_index);
+ uint8_t new_pl = cpu->ops.get_privilege_level(cpu);
+
+ if (new_pl == cpu->privilege_level) {
+ return;
+ }
+
+ uint64_t pc = (uintptr_t) udata;
+ uint64_t timestamp = gettime_ns();
+
+ trace_exit_stack(cpu->trace, cpu->cs, timestamp);
+ callstack_clear(cpu->cs);
+
+ cpu->privilege_level = new_pl;
+ cpu->trace = g_array_index(cpu->traces, Trace*, new_pl);
+
+ cpu_unwind_stack(cpu, cpu->ops.get_frame_pointer(cpu), pc);
+ trace_enter_stack(cpu->trace, cpu->cs, timestamp);
+}
+
+static void track_callstack(unsigned int cpu_index, void *udata)
+{
+ uint64_t pc = (uintptr_t) udata;
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, cpu_index);
+ uint64_t timestamp = gettime_ns();
+ Callstack *cs = cpu->cs;
+ Trace *t = cpu->trace;
+
+ uint64_t fp = cpu->ops.get_frame_pointer(cpu);
+ if (!fp && callstack_empty(cs)) {
+ /*
+ * We simply push current pc. Note that we won't detect symbol change as
+ * long as a proper call does not happen.
+ */
+ callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc});
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ return;
+ }
+
+ CallstackEntry top = callstack_top(cs);
+ if (fp == top.frame_pointer) {
+ /* same function */
+ return;
+ }
+
+ CallstackEntry caller = callstack_caller(cs);
+ if (fp == caller.frame_pointer) {
+ /* return */
+ CallstackEntry e = callstack_pop(cs);
+ trace_exit_function(t, timestamp, e.pc, callstack_depth(cs));
+ return;
+ }
+
+ uint64_t caller_fp = fp ? cpu_read_memory64(cpu, fp) : 0;
+ if (caller_fp == top.frame_pointer) {
+ /* call */
+ callstack_push(cs, (CallstackEntry){.frame_pointer = fp, .pc = pc});
+ trace_enter_function(t, timestamp, pc, callstack_depth(cs));
+ return;
+ }
+
+ /* discontinuity, exit current stack and unwind new one */
+ trace_exit_stack(t, cs, timestamp);
+ callstack_clear(cs);
+
+ cpu_unwind_stack(cpu, fp, pc);
+ trace_enter_stack(t, cs, timestamp);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n_insns = qemu_plugin_tb_n_insns(tb);
+ uintptr_t tb_pc = qemu_plugin_tb_vaddr(tb);
+
+ if (trace_privilege_level) {
+ qemu_plugin_register_vcpu_tb_exec_cb(tb, track_privilege_change,
+ QEMU_PLUGIN_CB_R_REGS,
+ (void *) tb_pc);
+ }
+
+ /*
+ * Callbacks and inline instrumentation are inserted before an instruction.
+ * Thus, to see instruction effect, we need to wait for next one.
+ * Potentially, the last instruction of a block could modify the frame
+ * pointer. Thus, we need to always instrument first instruction in a tb.
+ */
+ bool instrument_insn = true;
+ for (size_t i = 0; i < n_insns; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+
+ if (instrument_insn) {
+ uintptr_t pc = qemu_plugin_insn_vaddr(insn);
+ qemu_plugin_register_vcpu_insn_exec_cb(insn, track_callstack,
+ QEMU_PLUGIN_CB_R_REGS,
+ (void *) pc);
+ instrument_insn = false;
+ }
+
+ char *disas = qemu_plugin_insn_disas(insn);
+ if (arch_ops.does_insn_modify_frame_pointer(disas)) {
+ instrument_insn = true;
+ }
+ }
+}
+
+static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
+{
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, vcpu_index);
+ cpu->ops = arch_ops;
+
+ cpu->ops.init(cpu);
+ cpu->buf = g_byte_array_new();
+ cpu->traces = g_array_new(0, 0, sizeof(Trace *));
+
+ g_assert(vcpu_index < UINT32_MAX / TRACE_ID_SCALE);
+ g_assert(cpu->ops.num_privilege_levels() < TRACE_ID_SCALE);
+ /* trace_id is: cpu_number * TRACE_ID_SCALE + privilege_level */
+ uint32_t trace_id = (vcpu_index + 1) * TRACE_ID_SCALE;
+
+ if (trace_privilege_level) {
+ for (uint8_t pl = 0; pl < cpu->ops.num_privilege_levels(); ++pl) {
+ g_autoptr(GString) trace_name = g_string_new(NULL);
+ g_string_append_printf(trace_name, "cpu%u %s", vcpu_index,
+ cpu->ops.get_privilege_level_name(pl));
+ Trace *t = trace_new(trace_id + pl, trace_name);
+ g_array_append_val(cpu->traces, t);
+ }
+ } else {
+ g_autoptr(GString) trace_name = g_string_new(NULL);
+ g_string_append_printf(trace_name, "cpu%u", vcpu_index);
+ Trace *t = trace_new(trace_id, trace_name);
+ g_array_append_val(cpu->traces, t);
+ }
+
+ for (size_t i = 0; i < cpu->traces->len; ++i) {
+ /* create/truncate trace files */
+ Trace *t = g_array_index(cpu->traces, Trace*, i);
+ trace_flush(t, false);
+ }
+
+ cpu->cs = callstack_new();
+ cpu->trace = g_array_index(cpu->traces, Trace*, cpu->privilege_level);
+}
+
+static void vcpu_end(unsigned int vcpu_index)
+{
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, vcpu_index);
+ g_byte_array_free(cpu->buf, true);
+
+ for (size_t i = 0; i < cpu->traces->len; ++i) {
+ Trace *t = g_array_index(cpu->traces, Trace*, i);
+ trace_free(t);
+ }
+
+ g_array_free(cpu->traces, true);
+ callstack_free(cpu->cs);
+ memset(cpu, 0, sizeof(Cpu));
+}
+
+static void at_exit(qemu_plugin_id_t id, void *data)
+{
+ bool system_emulation = (bool) data;
+ g_autoptr(GArray) traces = g_array_new(0, 0, sizeof(Trace *));
+
+ for (size_t i = 0; i < qemu_plugin_num_vcpus(); ++i) {
+ Cpu *cpu = qemu_plugin_scoreboard_find(score, i);
+ for (size_t j = 0; j < cpu->traces->len; ++j) {
+ Trace *t = g_array_index(cpu->traces, Trace*, j);
+ trace_flush(t, true);
+ g_array_append_val(traces, t);
+ }
+ }
+
+ uftrace_write_map(system_emulation);
+ uftrace_write_info(traces);
+ uftrace_write_task(traces);
+
+ for (size_t i = 0; i < qemu_plugin_num_vcpus(); ++i) {
+ vcpu_end(i);
+ }
+
+ qemu_plugin_scoreboard_free(score);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info,
+ int argc, char **argv)
+{
+ for (int i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+ if (g_strcmp0(tokens[0], "trace-privilege-level") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
+ &trace_privilege_level)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ if (!strcmp(info->target_name, "aarch64")) {
+ arch_ops = aarch64_ops;
+ } else if (!strcmp(info->target_name, "x86_64")) {
+ arch_ops = x64_ops;
+ } else {
+ fprintf(stderr, "plugin uftrace: %s target is not supported\n",
+ info->target_name);
+ return 1;
+ }
+
+ score = qemu_plugin_scoreboard_new(sizeof(Cpu));
+ qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
+ qemu_plugin_register_atexit_cb(id, at_exit, (void *) info->system_emulation);
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+
+ return 0;
+}
diff --git a/contrib/plugins/uftrace_symbols.py b/contrib/plugins/uftrace_symbols.py
new file mode 100755
index 0000000..b49e032
--- /dev/null
+++ b/contrib/plugins/uftrace_symbols.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Create symbols and mapping files for uftrace.
+#
+# Copyright 2025 Linaro Ltd
+# Author: Pierrick Bouvier <pierrick.bouvier@linaro.org>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import elftools # pip install pyelftools
+import os
+
+from elftools.elf.elffile import ELFFile
+from elftools.elf.sections import SymbolTableSection
+
+def elf_func_symbols(elf):
+ symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections())
+ if isinstance(s, SymbolTableSection)]
+ symbols = []
+ for _, section in symbol_tables:
+ for _, symbol in enumerate(section.iter_symbols()):
+ if symbol_size(symbol) == 0:
+ continue
+ type = symbol['st_info']['type']
+ if type == 'STT_FUNC' or type == 'STT_NOTYPE':
+ symbols.append(symbol)
+ symbols.sort(key = lambda x: symbol_addr(x))
+ return symbols
+
+def symbol_size(symbol):
+ return symbol['st_size']
+
+def symbol_addr(symbol):
+ addr = symbol['st_value']
+ # clamp addr to 48 bits, like uftrace entries
+ return addr & 0xffffffffffff
+
+def symbol_name(symbol):
+ return symbol.name
+
+class BinaryFile:
+ def __init__(self, path, map_offset):
+ self.fullpath = os.path.realpath(path)
+ self.map_offset = map_offset
+ with open(path, 'rb') as f:
+ self.elf = ELFFile(f)
+ self.symbols = elf_func_symbols(self.elf)
+
+ def path(self):
+ return self.fullpath
+
+ def addr_start(self):
+ return self.map_offset
+
+ def addr_end(self):
+ last_sym = self.symbols[-1]
+ return symbol_addr(last_sym) + symbol_size(last_sym) + self.map_offset
+
+ def generate_symbol_file(self, prefix_symbols):
+ binary_name = os.path.basename(self.fullpath)
+ sym_file_path = f'./uftrace.data/{binary_name}.sym'
+ print(f'{sym_file_path} ({len(self.symbols)} symbols)')
+ with open(sym_file_path, 'w') as sym_file:
+ # print hexadecimal addresses on 48 bits
+ addrx = "0>12x"
+ for s in self.symbols:
+ addr = symbol_addr(s)
+ addr = f'{addr:{addrx}}'
+ size = f'{symbol_size(s):{addrx}}'
+ name = symbol_name(s)
+ if prefix_symbols:
+ name = f'{binary_name}:{name}'
+ print(addr, size, 'T', name, file=sym_file)
+
+def parse_parameter(p):
+ s = p.split(":")
+ path = s[0]
+ if len(s) == 1:
+ return path, 0
+ if len(s) > 2:
+ raise ValueError('only one offset can be set')
+ offset = s[1]
+ if not offset.startswith('0x'):
+ err = f'offset "{offset}" is not an hexadecimal constant. '
+ err += 'It should starts with "0x".'
+ raise ValueError(err)
+ offset = int(offset, 16)
+ return path, offset
+
+def is_from_user_mode(map_file_path):
+ if os.path.exists(map_file_path):
+ with open(map_file_path, 'r') as map_file:
+ if not map_file.readline().startswith('# map stack on'):
+ return True
+ return False
+
+def generate_map(binaries):
+ map_file_path = './uftrace.data/sid-0.map'
+
+ if is_from_user_mode(map_file_path):
+ print(f'do not overwrite {map_file_path} generated from qemu-user')
+ return
+
+ mappings = []
+
+ # print hexadecimal addresses on 48 bits
+ addrx = "0>12x"
+
+ mappings += ['# map stack on highest address possible, to prevent uftrace']
+ mappings += ['# from considering any kernel address']
+ mappings += ['ffffffffffff-ffffffffffff rw-p 00000000 00:00 0 [stack]']
+
+ for b in binaries:
+ m = f'{b.addr_start():{addrx}}-{b.addr_end():{addrx}}'
+ m += f' r--p 00000000 00:00 0 {b.path()}'
+ mappings.append(m)
+
+ with open(map_file_path, 'w') as map_file:
+ print('\n'.join(mappings), file=map_file)
+ print(f'{map_file_path}')
+ print('\n'.join(mappings))
+
+def main():
+ parser = argparse.ArgumentParser(description=
+ 'generate symbol files for uftrace')
+ parser.add_argument('elf_file', nargs='+',
+ help='path to an ELF file. '
+ 'Use /path/to/file:0xdeadbeef to add a mapping offset.')
+ parser.add_argument('--prefix-symbols',
+ help='prepend binary name to symbols',
+ action=argparse.BooleanOptionalAction)
+ args = parser.parse_args()
+
+ if not os.path.exists('./uftrace.data'):
+ os.mkdir('./uftrace.data')
+
+ binaries = []
+ for file in args.elf_file:
+ path, offset = parse_parameter(file)
+ b = BinaryFile(path, offset)
+ binaries.append(b)
+ binaries.sort(key = lambda b: b.addr_end());
+
+ for b in binaries:
+ b.generate_symbol_file(args.prefix_symbols)
+
+ generate_map(binaries)
+
+if __name__ == '__main__':
+ main()
diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
index 456d01d..8a5e128 100644
--- a/docs/about/emulation.rst
+++ b/docs/about/emulation.rst
@@ -816,6 +816,205 @@ This plugin can limit the number of Instructions Per Second that are executed::
The lower the number the more accurate time will be, but the less efficient the plugin.
Defaults to ips/10
+Uftrace
+.......
+
+``contrib/plugins/uftrace.c``
+
+This plugin generates a binary trace compatible with
+`uftrace <https://github.com/namhyung/uftrace>`_.
+
+Plugin supports aarch64 and x64, and works in user and system mode, allowing to
+trace a system boot, which is not something possible usually.
+
+In user mode, the memory mapping is directly copied from ``/proc/self/maps`` at
+the end of execution. Uftrace should be able to retrieve symbols by itself,
+without any additional step.
+In system mode, the default memory mapping is empty, and you can generate
+one (and associated symbols) using ``contrib/plugins/uftrace_symbols.py``.
+Symbols must be present in ELF binaries.
+
+It tracks the call stack (based on frame pointer analysis). Thus, your program
+and its dependencies must be compiled using ``-fno-omit-frame-pointer
+-mno-omit-leaf-frame-pointer``. In 2024, `Ubuntu and Fedora enabled it by
+default again on x64
+<https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>`_.
+On aarch64, this is less of a problem, as they are usually part of the ABI,
+except for leaf functions. That's true for user space applications, but not
+necessarily for bare metal code. You can read this `section
+<uftrace_build_system_example>` to easily build a system with frame pointers.
+
+When tracing long scenarios (> 1 min), the generated trace can become very long,
+making it hard to extract data from it. In this case, a simple solution is to
+trace execution while generating a timestamped output log using
+``qemu-system-aarch64 ... | ts "%s"``. Then, ``uftrace --time-range=start~end``
+can be used to reduce trace for only this part of execution.
+
+Performance wise, overhead compared to normal tcg execution is around x5-x15.
+
+.. list-table:: Uftrace plugin arguments
+ :widths: 20 80
+ :header-rows: 1
+
+ * - Option
+ - Description
+ * - trace-privilege-level=[on|off]
+ - Generate separate traces for each privilege level (Exception Level +
+ Security State on aarch64, Rings on x64).
+
+.. list-table:: uftrace_symbols.py arguments
+ :widths: 20 80
+ :header-rows: 1
+
+ * - Option
+ - Description
+ * - elf_file [elf_file ...]
+ - path to an ELF file. Use /path/to/file:0xdeadbeef to add a mapping offset.
+ * - --prefix-symbols
+ - prepend binary name to symbols
+
+Example user trace
+++++++++++++++++++
+
+As an example, we can trace qemu itself running git::
+
+ $ ./build/qemu-aarch64 -plugin \
+ build/contrib/plugins/libuftrace.so \
+ ./build/qemu-aarch64 /usr/bin/git --help
+
+ # and generate a chrome trace directly
+ $ uftrace dump --chrome | gzip > ~/qemu_aarch64_git_help.json.gz
+
+For convenience, you can download this trace `qemu_aarch64_git_help.json.gz
+<https://fileserver.linaro.org/s/N8X8fnZ5yGRZLsT/download/qemu_aarch64_git_help.json.gz>`_.
+Download it and open this trace on https://ui.perfetto.dev/. You can zoom in/out
+using :kbd:`W`, :kbd:`A`, :kbd:`S`, :kbd:`D` keys.
+Some sequences taken from this trace:
+
+- Loading program and its interpreter
+
+.. image:: https://fileserver.linaro.org/s/fie8JgX76yyL5cq/preview
+ :height: 200px
+
+- open syscall
+
+.. image:: https://fileserver.linaro.org/s/rsXPTeZZPza4PcE/preview
+ :height: 200px
+
+- TB creation
+
+.. image:: https://fileserver.linaro.org/s/GXY6NKMw5EeRCew/preview
+ :height: 200px
+
+It's usually better to use ``uftrace record`` directly. However, tracing
+binaries through qemu-user can be convenient when you don't want to recompile
+them (``uftrace record`` requires instrumentation), as long as symbols are
+present.
+
+Example system trace
+++++++++++++++++++++
+
+A full trace example (chrome trace, from instructions below) generated from a
+system boot can be found `here
+<https://fileserver.linaro.org/s/WsemLboPEzo24nw/download/aarch64_boot.json.gz>`_.
+Download it and open this trace on https://ui.perfetto.dev/. You can see code
+executed for all privilege levels, and zoom in/out using
+:kbd:`W`, :kbd:`A`, :kbd:`S`, :kbd:`D` keys. You can find below some sequences
+taken from this trace:
+
+- Two first stages of boot sequence in Arm Trusted Firmware (EL3 and S-EL1)
+
+.. image:: https://fileserver.linaro.org/s/kkxBS552W7nYESX/preview
+ :height: 200px
+
+- U-boot initialization (until code relocation, after which we can't track it)
+
+.. image:: https://fileserver.linaro.org/s/LKTgsXNZFi5GFNC/preview
+ :height: 200px
+
+- Stat and open syscalls in kernel
+
+.. image:: https://fileserver.linaro.org/s/dXe4MfraKg2F476/preview
+ :height: 200px
+
+- Timer interrupt
+
+.. image:: https://fileserver.linaro.org/s/TM5yobYzJtP7P3C/preview
+ :height: 200px
+
+- Poweroff sequence (from kernel back to firmware, NS-EL2 to EL3)
+
+.. image:: https://fileserver.linaro.org/s/oR2PtyGKJrqnfRf/preview
+ :height: 200px
+
+Build and run system example
+++++++++++++++++++++++++++++
+
+.. _uftrace_build_system_example:
+
+Building a full system image with frame pointers is not trivial.
+
+We provide a `simple way <https://github.com/pbo-linaro/qemu-linux-stack>`_ to
+build an aarch64 system, combining Arm Trusted firmware, U-boot, Linux kernel
+and debian userland. It's based on containers (``podman`` only) and
+``qemu-user-static (binfmt)`` to make sure it's easily reproducible and does not depend
+on machine where you build it.
+
+You can follow the exact same instructions for a x64 system, combining edk2,
+Linux, and Ubuntu, simply by switching to
+`x86_64 <https://github.com/pbo-linaro/qemu-linux-stack/tree/x86_64>`_ branch.
+
+To build the system::
+
+ # Install dependencies
+ $ sudo apt install -y podman qemu-user-static
+
+ $ git clone https://github.com/pbo-linaro/qemu-linux-stack
+ $ cd qemu-linux-stack
+ $ ./build.sh
+
+ # system can be started using:
+ $ ./run.sh /path/to/qemu-system-aarch64
+
+To generate a uftrace for a system boot from that::
+
+ # run true and poweroff the system
+ $ env INIT=true ./run.sh path/to/qemu-system-aarch64 \
+ -plugin path/to/contrib/plugins/libuftrace.so,trace-privilege-level=on
+
+ # generate symbols and memory mapping
+ $ path/to/contrib/plugins/uftrace_symbols.py \
+ --prefix-symbols \
+ arm-trusted-firmware/build/qemu/debug/bl1/bl1.elf \
+ arm-trusted-firmware/build/qemu/debug/bl2/bl2.elf \
+ arm-trusted-firmware/build/qemu/debug/bl31/bl31.elf \
+ u-boot/u-boot:0x60000000 \
+ linux/vmlinux
+
+ # inspect trace with
+ $ uftrace replay
+
+Uftrace allows to filter the trace, and dump flamegraphs, or a chrome trace.
+This last one is very interesting to see visually the boot process::
+
+ $ uftrace dump --chrome > boot.json
+ # Open your browser, and load boot.json on https://ui.perfetto.dev/.
+
+Long visual chrome traces can't be easily opened, thus, it might be
+interesting to generate them around a particular point of execution::
+
+ # execute qemu and timestamp output log
+ $ env INIT=true ./run.sh path/to/qemu-system-aarch64 \
+ -plugin path/to/contrib/plugins/libuftrace.so,trace-privilege-level=on |&
+ ts "%s" | tee exec.log
+
+ $ cat exec.log | grep 'Run /init'
+ 1753122320 [ 11.834391] Run /init as init process
+ # init was launched at 1753122320
+
+ # generate trace around init execution (2 seconds):
+ $ uftrace dump --chrome --time-range=1753122320~1753122322 > init.json
+
Other emulation features
------------------------
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 2c88419..6204aa6 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -450,7 +450,7 @@ are run with ``make bench``. Meson test suites such as ``unit`` can be ran
with ``make check-unit``, and ``make check-tcg`` builds and runs "non-Meson"
tests for all targets.
-If desired, it is also possible to use ``ninja`` and ``meson test``,
+If desired, it is also possible to use ``ninja`` and ``pyvenv/bin/meson test``,
respectively to build emulators and run tests defined in meson.build.
The main difference is that ``make`` needs the ``-jN`` flag in order to
enable parallel builds or tests.
diff --git a/docs/devel/code-provenance.rst b/docs/devel/code-provenance.rst
index b5aae2e..8cdc56f 100644
--- a/docs/devel/code-provenance.rst
+++ b/docs/devel/code-provenance.rst
@@ -285,8 +285,8 @@ Such tools are acceptable to use, provided there is clearly defined copyright
and licensing for their output. Note in particular the caveats applying to AI
content generators below.
-Use of AI content generators
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use of AI-generated content
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
TL;DR:
@@ -294,6 +294,10 @@ TL;DR:
believed to include or derive from AI generated content. This includes
ChatGPT, Claude, Copilot, Llama and similar tools.**
+ **This policy does not apply to other uses of AI, such as researching APIs
+ or algorithms, static analysis, or debugging, provided their output is not
+ included in contributions.**
+
The increasing prevalence of AI-assisted software development results in a
number of difficult legal questions and risks for software projects, including
QEMU. Of particular concern is content generated by `Large Language Models
@@ -322,17 +326,24 @@ The QEMU project thus requires that contributors refrain from using AI content
generators on patches intended to be submitted to the project, and will
decline any contribution if use of AI is either known or suspected.
-This policy does not apply to other uses of AI, such as researching APIs or
-algorithms, static analysis, or debugging, provided their output is not to be
-included in contributions.
-
Examples of tools impacted by this policy includes GitHub's CoPilot, OpenAI's
ChatGPT, Anthropic's Claude, and Meta's Code Llama, and code/content
generation agents which are built on top of such tools.
This policy may evolve as AI tools mature and the legal situation is
-clarifed. In the meanwhile, requests for exceptions to this policy will be
-evaluated by the QEMU project on a case by case basis. To be granted an
-exception, a contributor will need to demonstrate clarity of the license and
-copyright status for the tool's output in relation to its training model and
-code, to the satisfaction of the project maintainers.
+clarified.
+
+Exceptions
+^^^^^^^^^^
+
+The QEMU project welcomes discussion on any exceptions to this policy,
+or more general revisions. This can be done by contacting the qemu-devel
+mailing list with details of a proposed tool, model, usage scenario, etc.
+that is beneficial to QEMU, while still mitigating issues around compliance
+with the DCO. After discussion, any exception will be listed below.
+
+Exceptions do not remove the need for authors to comply with all other
+requirements for contribution. In particular, the "Signed-off-by"
+label in a patch submission is a statement that the author takes
+responsibility for the entire contents of the patch, including any parts
+that were generated or assisted by AI tools or other tools.
diff --git a/docs/devel/memory.rst b/docs/devel/memory.rst
index 42d3ca2..f22146e 100644
--- a/docs/devel/memory.rst
+++ b/docs/devel/memory.rst
@@ -165,17 +165,14 @@ and finalized one by one. The order in which memory regions will be
finalized is not guaranteed.
If however the memory region is part of a dynamically allocated data
-structure, you should call object_unparent() to destroy the memory region
-before the data structure is freed. For an example see VFIOMSIXInfo
-and VFIOQuirk in hw/vfio/pci.c.
+structure, you should free the memory region in the instance_finalize
+callback. For an example see VFIOMSIXInfo and VFIOQuirk in
+hw/vfio/pci.c.
You must not destroy a memory region as long as it may be in use by a
device or CPU. In order to do this, as a general rule do not create or
-destroy memory regions dynamically during a device's lifetime, and only
-call object_unparent() in the memory region owner's instance_finalize
-callback. The dynamically allocated data structure that contains the
-memory region then should obviously be freed in the instance_finalize
-callback as well.
+destroy memory regions dynamically during a device's lifetime, and never
+call object_unparent().
If you break this rule, the following situation can happen:
@@ -201,9 +198,7 @@ this exception is rarely necessary, and therefore it is discouraged,
but nevertheless it is used in a few places.
For regions that "have no owner" (NULL is passed at creation time), the
-machine object is actually used as the owner. Since instance_finalize is
-never called for the machine object, you must never call object_unparent
-on regions that have no owner, unless they are aliases or containers.
+machine object is actually used as the owner.
Overlapping regions and priority
diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index 13a20e8..2f0ab2e 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -66,7 +66,7 @@ __ https://mesonbuild.com/Commands.html#devenv
As shown above, you can use the ``--tests`` option as usual to operate on test
code. Note however that you cannot *build* or run tests via ``cargo``, because
they need support C code from QEMU that Cargo does not know about. Tests can
-be run via ``meson test`` or ``make``::
+be run via Meson (``pyvenv/bin/meson test``) or ``make``::
make check-rust
diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst
index 71f31cb..50f625f 100644
--- a/docs/system/devices/igb.rst
+++ b/docs/system/devices/igb.rst
@@ -54,7 +54,7 @@ directory:
.. code-block:: shell
- meson test qtest-x86_64/qos-test
+ pyvenv/bin/meson test qtest-x86_64/qos-test
ethtool can test register accesses, interrupts, etc. It is automated as an
functional test and can be run from the build directory with the following
diff --git a/hw/core/register.c b/hw/core/register.c
index 8f63d9f..3340df7 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -314,7 +314,6 @@ RegisterInfoArray *register_init_block64(DeviceState *owner,
void register_finalize_block(RegisterInfoArray *r_array)
{
- object_unparent(OBJECT(&r_array->mem));
g_free(r_array->r);
g_free(r_array);
}
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
index 6dbcb2d..2d6d7db 100644
--- a/hw/hyperv/hv-balloon.c
+++ b/hw/hyperv/hv-balloon.c
@@ -1475,16 +1475,6 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
balloon->mr->align = memory_region_get_alignment(hostmem_mr);
}
-static void hv_balloon_free_mr(HvBalloon *balloon)
-{
- if (!balloon->mr) {
- return;
- }
-
- object_unparent(OBJECT(balloon->mr));
- g_clear_pointer(&balloon->mr, g_free);
-}
-
static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
{
ERRP_GUARD();
@@ -1580,7 +1570,7 @@ static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
*/
static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
{
- hv_balloon_free_mr(balloon);
+ g_clear_pointer(&balloon->mr, g_free);
balloon->addr = 0;
balloon->memslot_count = 0;
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 4b4cf09..72e91f9 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -3037,15 +3037,7 @@ void gicv3_init_cpuif(GICv3State *s)
* cpu->gic_pribits
*/
- /* Note that we can't just use the GICv3CPUState as an opaque pointer
- * in define_arm_cp_regs_with_opaque(), because when we're called back
- * it might be with code translated by CPU 0 but run by CPU 1, in
- * which case we'd get the wrong value.
- * So instead we define the regs with no ri->opaque info, and
- * get back to the GICv3CPUState from the CPUARMState.
- *
- * These CP regs callbacks can be called from either TCG or HVF code.
- */
+ /* These CP regs callbacks can be called from either TCG or HVF. */
define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
/*
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 7e1c71e..a748a0b 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -24,7 +24,7 @@
#include "hw/pci-host/spapr.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_device.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "qemu/error-report.h"
#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
@@ -32,7 +32,7 @@
* Interfaces for IBM EEH (Enhanced Error Handling)
*/
#ifdef CONFIG_VFIO_PCI
-static bool vfio_eeh_container_ok(VFIOContainer *container)
+static bool vfio_eeh_container_ok(VFIOLegacyContainer *container)
{
/*
* As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
@@ -60,7 +60,7 @@ static bool vfio_eeh_container_ok(VFIOContainer *container)
return true;
}
-static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+static int vfio_eeh_container_op(VFIOLegacyContainer *container, uint32_t op)
{
struct vfio_eeh_pe_op pe_op = {
.argsz = sizeof(pe_op),
@@ -83,10 +83,10 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
return ret;
}
-static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as)
{
VFIOAddressSpace *space = vfio_address_space_get(as);
- VFIOContainerBase *bcontainer = NULL;
+ VFIOContainer *bcontainer = NULL;
if (QLIST_EMPTY(&space->containers)) {
/* No containers to act on */
@@ -111,14 +111,14 @@ out:
static bool vfio_eeh_as_ok(AddressSpace *as)
{
- VFIOContainer *container = vfio_eeh_as_container(as);
+ VFIOLegacyContainer *container = vfio_eeh_as_container(as);
return (container != NULL) && vfio_eeh_container_ok(container);
}
static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
{
- VFIOContainer *container = vfio_eeh_as_container(as);
+ VFIOLegacyContainer *container = vfio_eeh_as_container(as);
if (!container) {
return -ENODEV;
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 938a551..9e31029 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -20,7 +20,7 @@
#include "hw/s390x/s390-pci-clp.h"
#include "hw/s390x/s390-pci-vfio.h"
#include "hw/vfio/pci.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "hw/vfio/vfio-helpers.h"
/*
@@ -62,7 +62,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
{
S390PCIDMACount *cnt;
uint32_t avail;
- VFIOPCIDevice *vpdev = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpdev = VFIO_PCI_DEVICE(pbdev->pdev);
int id;
assert(vpdev);
@@ -108,7 +108,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_base *cap;
- VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
uint64_t vfio_size;
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
@@ -162,7 +162,7 @@ static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_base *cap;
- VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
@@ -185,7 +185,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev,
struct vfio_device_info_cap_zpci_group *cap;
S390pciState *s = s390_get_phb();
ClpRspQueryPciGrp *resgrp;
- VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
uint8_t start_gid = pbdev->zpci_fn.pfgid;
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
@@ -264,7 +264,7 @@ static void s390_pci_read_util(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_util *cap;
- VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
@@ -291,7 +291,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_pfip *cap;
- VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
@@ -314,7 +314,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev)
{
- VFIOPCIDevice *vfio_pci = VFIO_PCI_BASE(pbdev->pdev);
+ VFIOPCIDevice *vfio_pci = VFIO_PCI_DEVICE(pbdev->pdev);
return vfio_get_device_info(vfio_pci->vbasedev.fd);
}
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 3c897e5..89b595c 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1578,10 +1578,6 @@ static void sdhci_sysbus_finalize(Object *obj)
{
SDHCIState *s = SYSBUS_SDHCI(obj);
- if (s->dma_mr) {
- object_unparent(OBJECT(s->dma_mr));
- }
-
sdhci_uninitfn(s);
}
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 4822c70..e207d05 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -735,6 +735,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
bool spd;
bool queuing = (q != NULL);
uint8_t pid = td->token & 0xff;
+ uint8_t ep_id = (td->token >> 15) & 0xf;
UHCIAsync *async;
async = uhci_async_find_td(s, td_addr);
@@ -778,9 +779,14 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
switch (pid) {
case USB_TOKEN_OUT:
- case USB_TOKEN_SETUP:
case USB_TOKEN_IN:
break;
+ case USB_TOKEN_SETUP:
+ /* SETUP is only valid to endpoint 0 */
+ if (ep_id == 0) {
+ break;
+ }
+ /* fallthrough */
default:
/* invalid pid : frame interrupted */
s->status |= UHCI_STS_HCPERR;
@@ -829,7 +835,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
return uhci_handle_td_error(s, td, td_addr, USB_RET_NODEV,
int_mask);
}
- ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
+ ep = usb_ep_get(dev, pid, ep_id);
q = uhci_queue_new(s, qh_addr, td, ep);
}
async = uhci_async_alloc(q, td_addr);
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
index 3cdbd44..411eb7b 100644
--- a/hw/vfio-user/container.c
+++ b/hw/vfio-user/container.c
@@ -22,14 +22,14 @@
* will fire during memory update transactions. These depend on BQL being held,
* so do any resulting map/demap ops async while keeping BQL.
*/
-static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
+static void vfio_user_listener_begin(VFIOContainer *bcontainer)
{
VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
container->proxy->async_ops = true;
}
-static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
+static void vfio_user_listener_commit(VFIOContainer *bcontainer)
{
VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
@@ -38,7 +38,7 @@ static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
vfio_user_wait_reqs(container->proxy);
}
-static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
+static int vfio_user_dma_unmap(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
@@ -80,7 +80,7 @@ static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
return ret;
}
-static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+static int vfio_user_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
MemoryRegion *mrp)
{
@@ -154,14 +154,14 @@ static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
}
static int
-vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+vfio_user_set_dirty_page_tracking(const VFIOContainer *bcontainer,
bool start, Error **errp)
{
error_setg_errno(errp, ENOTSUP, "Not supported");
return -ENOTSUP;
}
-static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+static int vfio_user_query_dirty_bitmap(const VFIOContainer *bcontainer,
VFIOBitmap *vbmap, hwaddr iova,
hwaddr size, Error **errp)
{
@@ -169,7 +169,7 @@ static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
return -ENOTSUP;
}
-static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
+static bool vfio_user_setup(VFIOContainer *bcontainer, Error **errp)
{
VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
@@ -202,7 +202,7 @@ static VFIOUserContainer *
vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
Error **errp)
{
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
VFIOUserContainer *container;
VFIOAddressSpace *space;
VFIOIOMMUClass *vioc;
@@ -260,7 +260,7 @@ put_space_exit:
static void vfio_user_container_disconnect(VFIOUserContainer *container)
{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
VFIOAddressSpace *space = bcontainer->space;
diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
index 96aa678..a2b42e3 100644
--- a/hw/vfio-user/container.h
+++ b/hw/vfio-user/container.h
@@ -9,12 +9,12 @@
#include "qemu/osdep.h"
-#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-container.h"
#include "hw/vfio-user/proxy.h"
/* MMU container sub-class for vfio-user. */
struct VFIOUserContainer {
- VFIOContainerBase parent_obj;
+ VFIOContainer parent_obj;
VFIOUserProxy *proxy;
};
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
index e2c3097..b53ed3b 100644
--- a/hw/vfio-user/pci.c
+++ b/hw/vfio-user/pci.c
@@ -234,9 +234,10 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
const char *sock_name;
+
AddressSpace *as;
SocketAddress addr;
VFIOUserProxy *proxy;
@@ -343,10 +344,10 @@ error:
vfio_pci_put_device(vdev);
}
-static void vfio_user_instance_init(Object *obj)
+static void vfio_user_pci_init(Object *obj)
{
PCIDevice *pci_dev = PCI_DEVICE(obj);
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
device_add_bootindex_property(obj, &vdev->bootindex,
@@ -369,9 +370,9 @@ static void vfio_user_instance_init(Object *obj)
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
-static void vfio_user_instance_finalize(Object *obj)
+static void vfio_user_pci_finalize(Object *obj)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
if (vdev->msix != NULL) {
@@ -387,7 +388,7 @@ static void vfio_user_instance_finalize(Object *obj)
static void vfio_user_pci_reset(DeviceState *dev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev);
VFIODevice *vbasedev = &vdev->vbasedev;
vfio_pci_pre_reset(vdev);
@@ -399,7 +400,7 @@ static void vfio_user_pci_reset(DeviceState *dev)
vfio_pci_post_reset(vdev);
}
-static const Property vfio_user_pci_dev_properties[] = {
+static const Property vfio_user_pci_properties[] = {
DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
@@ -421,7 +422,7 @@ static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
bool success;
- if (VFIO_PCI_BASE(udev)->vbasedev.proxy) {
+ if (VFIO_PCI_DEVICE(udev)->vbasedev.proxy) {
error_setg(errp, "Proxy is connected");
return;
}
@@ -445,13 +446,13 @@ static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
}
}
-static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
+static void vfio_user_pci_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
device_class_set_legacy_reset(dc, vfio_user_pci_reset);
- device_class_set_props(dc, vfio_user_pci_dev_properties);
+ device_class_set_props(dc, vfio_user_pci_properties);
object_class_property_add(klass, "socket", "SocketAddress", NULL,
vfio_user_pci_set_socket, NULL, NULL);
@@ -462,18 +463,18 @@ static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
pdc->realize = vfio_user_pci_realize;
}
-static const TypeInfo vfio_user_pci_dev_info = {
+static const TypeInfo vfio_user_pci_info = {
.name = TYPE_VFIO_USER_PCI,
- .parent = TYPE_VFIO_PCI_BASE,
+ .parent = TYPE_VFIO_PCI_DEVICE,
.instance_size = sizeof(VFIOUserPCIDevice),
- .class_init = vfio_user_pci_dev_class_init,
- .instance_init = vfio_user_instance_init,
- .instance_finalize = vfio_user_instance_finalize,
+ .class_init = vfio_user_pci_class_init,
+ .instance_init = vfio_user_pci_init,
+ .instance_finalize = vfio_user_pci_finalize,
};
static void register_vfio_user_dev_type(void)
{
- type_register_static(&vfio_user_pci_dev_info);
+ type_register_static(&vfio_user_pci_info);
}
- type_init(register_vfio_user_dev_type)
+type_init(register_vfio_user_dev_type)
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
deleted file mode 100644
index 5630497..0000000
--- a/hw/vfio/container-base.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * VFIO BASE CONTAINER
- *
- * Copyright (C) 2023 Intel Corporation.
- * Copyright Red Hat, Inc. 2023
- *
- * Authors: Yi Liu <yi.l.liu@intel.com>
- * Eric Auger <eric.auger@redhat.com>
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include <sys/ioctl.h>
-#include <linux/vfio.h>
-
-#include "qemu/osdep.h"
-#include "system/tcg.h"
-#include "system/ram_addr.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "hw/vfio/vfio-container-base.h"
-#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
-#include "system/reset.h"
-#include "vfio-helpers.h"
-
-#include "trace.h"
-
-static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
- QLIST_HEAD_INITIALIZER(vfio_address_spaces);
-
-VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
-{
- VFIOAddressSpace *space;
-
- QLIST_FOREACH(space, &vfio_address_spaces, list) {
- if (space->as == as) {
- return space;
- }
- }
-
- /* No suitable VFIOAddressSpace, create a new one */
- space = g_malloc0(sizeof(*space));
- space->as = as;
- QLIST_INIT(&space->containers);
-
- if (QLIST_EMPTY(&vfio_address_spaces)) {
- qemu_register_reset(vfio_device_reset_handler, NULL);
- }
-
- QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
-
- return space;
-}
-
-void vfio_address_space_put(VFIOAddressSpace *space)
-{
- if (!QLIST_EMPTY(&space->containers)) {
- return;
- }
-
- QLIST_REMOVE(space, list);
- g_free(space);
-
- if (QLIST_EMPTY(&vfio_address_spaces)) {
- qemu_unregister_reset(vfio_device_reset_handler, NULL);
- }
-}
-
-void vfio_address_space_insert(VFIOAddressSpace *space,
- VFIOContainerBase *bcontainer)
-{
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
- bcontainer->space = space;
-}
-
-int vfio_container_dma_map(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly, MemoryRegion *mr)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- RAMBlock *rb = mr->ram_block;
- int mfd = rb ? qemu_ram_get_fd(rb) : -1;
-
- if (mfd >= 0 && vioc->dma_map_file) {
- unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
- unsigned long offset = qemu_ram_get_fd_offset(rb);
-
- return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
- readonly);
- }
- g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
-}
-
-int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- g_assert(vioc->dma_unmap);
- return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
-}
-
-bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- if (!vioc->add_window) {
- return true;
- }
-
- return vioc->add_window(bcontainer, section, errp);
-}
-
-void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- if (!vioc->del_window) {
- return;
- }
-
- return vioc->del_window(bcontainer, section);
-}
-
-int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
- bool start, Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- int ret;
-
- if (!bcontainer->dirty_pages_supported) {
- return 0;
- }
-
- g_assert(vioc->set_dirty_page_tracking);
- if (bcontainer->dirty_pages_started == start) {
- return 0;
- }
-
- ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
- if (!ret) {
- bcontainer->dirty_pages_started = start;
- }
-
- return ret;
-}
-
-static bool vfio_container_devices_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer)
-{
- VFIODevice *vbasedev;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- if (!vbasedev->dirty_tracking) {
- return false;
- }
- }
-
- return true;
-}
-
-bool vfio_container_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer)
-{
- return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
- bcontainer->dirty_pages_started;
-}
-
-bool vfio_container_devices_dirty_tracking_is_supported(
- const VFIOContainerBase *bcontainer)
-{
- VFIODevice *vbasedev;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
- return false;
- }
- if (!vbasedev->dirty_pages_supported) {
- return false;
- }
- }
-
- return true;
-}
-
-static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
- hwaddr size, void *bitmap)
-{
- uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
- sizeof(struct vfio_device_feature_dma_logging_report),
- sizeof(uint64_t))] = {};
- struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
- struct vfio_device_feature_dma_logging_report *report =
- (struct vfio_device_feature_dma_logging_report *)feature->data;
-
- report->iova = iova;
- report->length = size;
- report->page_size = qemu_real_host_page_size();
- report->bitmap = (uintptr_t)bitmap;
-
- feature->argsz = sizeof(buf);
- feature->flags = VFIO_DEVICE_FEATURE_GET |
- VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
-
- return vbasedev->io_ops->device_feature(vbasedev, feature);
-}
-
-static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- g_assert(vioc->query_dirty_bitmap);
- return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
- errp);
-}
-
-static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- VFIODevice *vbasedev;
- int ret;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- ret = vfio_device_dma_logging_report(vbasedev, iova, size,
- vbmap->bitmap);
- if (ret) {
- error_setg_errno(errp, -ret,
- "%s: Failed to get DMA logging report, iova: "
- "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
- vbasedev->name, iova, size);
-
- return ret;
- }
- }
-
- return 0;
-}
-
-int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
- uint64_t size, ram_addr_t ram_addr, Error **errp)
-{
- bool all_device_dirty_tracking =
- vfio_container_devices_dirty_tracking_is_supported(bcontainer);
- uint64_t dirty_pages;
- VFIOBitmap vbmap;
- int ret;
-
- if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
- cpu_physical_memory_set_dirty_range(ram_addr, size,
- tcg_enabled() ? DIRTY_CLIENTS_ALL :
- DIRTY_CLIENTS_NOCODE);
- return 0;
- }
-
- ret = vfio_bitmap_alloc(&vbmap, size);
- if (ret) {
- error_setg_errno(errp, -ret,
- "Failed to allocate dirty tracking bitmap");
- return ret;
- }
-
- if (all_device_dirty_tracking) {
- ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
- errp);
- } else {
- ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
- errp);
- }
-
- if (ret) {
- goto out;
- }
-
- dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
- vbmap.pages);
-
- trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
- dirty_pages);
-out:
- g_free(vbmap.bitmap);
-
- return ret;
-}
-
-static gpointer copy_iova_range(gconstpointer src, gpointer data)
-{
- Range *source = (Range *)src;
- Range *dest = g_new(Range, 1);
-
- range_set_bounds(dest, range_lob(source), range_upb(source));
- return dest;
-}
-
-GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer)
-{
- assert(bcontainer);
- return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
-}
-
-static void vfio_container_instance_finalize(Object *obj)
-{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
- VFIOGuestIOMMU *giommu, *tmp;
-
- QLIST_SAFE_REMOVE(bcontainer, next);
-
- QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
- memory_region_unregister_iommu_notifier(
- MEMORY_REGION(giommu->iommu_mr), &giommu->n);
- QLIST_REMOVE(giommu, giommu_next);
- g_free(giommu);
- }
-
- g_list_free_full(bcontainer->iova_ranges, g_free);
-}
-
-static void vfio_container_instance_init(Object *obj)
-{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
-
- bcontainer->error = NULL;
- bcontainer->dirty_pages_supported = false;
- bcontainer->dma_max_mappings = 0;
- bcontainer->iova_ranges = NULL;
- QLIST_INIT(&bcontainer->giommu_list);
- QLIST_INIT(&bcontainer->vrdl_list);
-}
-
-static const TypeInfo types[] = {
- {
- .name = TYPE_VFIO_IOMMU,
- .parent = TYPE_OBJECT,
- .instance_init = vfio_container_instance_init,
- .instance_finalize = vfio_container_instance_finalize,
- .instance_size = sizeof(VFIOContainerBase),
- .class_size = sizeof(VFIOIOMMUClass),
- .abstract = true,
- },
-};
-
-DEFINE_TYPES(types)
diff --git a/hw/vfio/container-legacy.c b/hw/vfio/container-legacy.c
new file mode 100644
index 0000000..c0f87f7
--- /dev/null
+++ b/hw/vfio/container-legacy.c
@@ -0,0 +1,1277 @@
+/*
+ * generic functions used by VFIO devices
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ * Adapted for KVM by Qumranet.
+ * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+
+#include "hw/vfio/vfio-device.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
+#include "system/ram_addr.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "system/reset.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
+#include "pci.h"
+#include "hw/vfio/vfio-container-legacy.h"
+#include "vfio-helpers.h"
+#include "vfio-listener.h"
+
+#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
+
+typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
+static VFIOGroupList vfio_group_list =
+ QLIST_HEAD_INITIALIZER(vfio_group_list);
+
+static int vfio_ram_block_discard_disable(VFIOLegacyContainer *container,
+ bool state)
+{
+ switch (container->iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ /*
+ * We support coordinated discarding of RAM via the RamDiscardManager.
+ */
+ return ram_block_uncoordinated_discard_disable(state);
+ default:
+ /*
+ * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
+ * RamDiscardManager, however, it is completely untested.
+ *
+ * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
+ * completely the opposite of managing mapping/pinning dynamically as
+ * required by RamDiscardManager. We would have to special-case sections
+ * with a RamDiscardManager.
+ */
+ return ram_block_discard_disable(state);
+ }
+}
+
+static int vfio_dma_unmap_bitmap(const VFIOLegacyContainer *container,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
+{
+ const VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ struct vfio_iommu_type1_dma_unmap *unmap;
+ struct vfio_bitmap *bitmap;
+ VFIOBitmap vbmap;
+ int ret;
+
+ ret = vfio_bitmap_alloc(&vbmap, size);
+ if (ret) {
+ return ret;
+ }
+
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+
+ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+ unmap->iova = iova;
+ unmap->size = size;
+ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+ bitmap = (struct vfio_bitmap *)&unmap->data;
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
+ * to qemu_real_host_page_size.
+ */
+ bitmap->pgsize = qemu_real_host_page_size();
+ bitmap->size = vbmap.size;
+ bitmap->data = (__u64 *)vbmap.bitmap;
+
+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
+ ret = -E2BIG;
+ goto unmap_exit;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ if (!ret) {
+ cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
+ iotlb->translated_addr, vbmap.pages);
+ } else {
+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
+ }
+
+unmap_exit:
+ g_free(unmap);
+ g_free(vbmap.bitmap);
+
+ return ret;
+}
+
+static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = 0,
+ .iova = iova,
+ .size = size,
+ };
+ bool need_dirty_sync = false;
+ int ret;
+ Error *local_err = NULL;
+
+ g_assert(!cpr_is_incoming());
+
+ if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
+ if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
+ bcontainer->dirty_pages_supported) {
+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+ }
+
+ need_dirty_sync = true;
+ }
+
+ while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ /*
+ * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+ * v4.15) where an overflow in its wrap-around check prevents us from
+ * unmapping the last page of the address space. Test for the error
+ * condition and re-try the unmap excluding the last page. The
+ * expectation is that we've never mapped the last page anyway and this
+ * unmap request comes via vIOMMU support which also makes it unlikely
+ * that this page is used. This bug was introduced well after type1 v2
+ * support was introduced, so we shouldn't need to test for v1. A fix
+ * is queued for kernel v5.0 so this workaround can be removed once
+ * affected kernels are sufficiently deprecated.
+ */
+ if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
+ container->iommu_type == VFIO_TYPE1v2_IOMMU) {
+ trace_vfio_legacy_dma_unmap_overflow_workaround();
+ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
+ continue;
+ }
+ return -errno;
+ }
+
+ if (need_dirty_sync) {
+ ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size,
+ iotlb->translated_addr, &local_err);
+ if (ret) {
+ error_report_err(local_err);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_legacy_dma_unmap(const VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ int ret;
+
+ if (unmap_all) {
+ /* The unmap ioctl doesn't accept a full 64-bit span. */
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+
+ ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
+ iotlb);
+
+ if (ret == 0) {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
+ int128_get64(llsize), iotlb);
+ }
+
+ } else {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
+ }
+
+ return ret;
+}
+
+static int vfio_legacy_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_READ,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ if (!readonly) {
+ map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ /*
+ * Try the mapping, if it fails with EBUSY, unmap the region and try
+ * again. This shouldn't be necessary, but we sometimes see it in
+ * the VGA ROM space.
+ */
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
+ (errno == EBUSY &&
+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 &&
+ ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
+ return 0;
+ }
+
+ return -errno;
+}
+
+static int
+vfio_legacy_set_dirty_page_tracking(const VFIOContainer *bcontainer,
+ bool start, Error **errp)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ int ret;
+ struct vfio_iommu_type1_dirty_bitmap dirty = {
+ .argsz = sizeof(dirty),
+ };
+
+ if (start) {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
+ } else {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+ if (ret) {
+ ret = -errno;
+ error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x",
+ dirty.flags);
+ }
+
+ return ret;
+}
+
+static int vfio_legacy_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap_get *range;
+ int ret;
+
+ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+
+ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
+ range->iova = iova;
+ range->size = size;
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize
+ * to qemu_real_host_page_size.
+ */
+ range->bitmap.pgsize = qemu_real_host_page_size();
+ range->bitmap.size = vbmap->size;
+ range->bitmap.data = (__u64 *)vbmap->bitmap;
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+ if (ret) {
+ ret = -errno;
+ error_setg_errno(errp, errno,
+ "Failed to get dirty bitmap for iova: 0x%"PRIx64
+ " size: 0x%"PRIx64, (uint64_t)range->iova,
+ (uint64_t)range->size);
+ }
+
+ g_free(dbitmap);
+
+ return ret;
+}
+
+static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
+ VFIOContainer *bcontainer)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_iova_range *cap;
+
+ hdr = vfio_get_iommu_type1_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ if (!hdr) {
+ return false;
+ }
+
+ cap = (void *)hdr;
+
+ for (int i = 0; i < cap->nr_iovas; i++) {
+ Range *range = g_new(Range, 1);
+
+ range_set_bounds(range, cap->iova_ranges[i].start,
+ cap->iova_ranges[i].end);
+ bcontainer->iova_ranges =
+ range_list_insert(bcontainer->iova_ranges, range);
+ }
+
+ return true;
+}
+
+static void vfio_group_add_kvm_device(VFIOGroup *group)
+{
+ Error *err = NULL;
+
+ if (vfio_kvm_device_add_fd(group->fd, &err)) {
+ error_reportf_err(err, "group ID %d: ", group->groupid);
+ }
+}
+
+static void vfio_group_del_kvm_device(VFIOGroup *group)
+{
+ Error *err = NULL;
+
+ if (vfio_kvm_device_del_fd(group->fd, &err)) {
+ error_reportf_err(err, "group ID %d: ", group->groupid);
+ }
+}
+
+/*
+ * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
+ */
+static int vfio_get_iommu_type(int container_fd,
+ Error **errp)
+{
+ int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+ VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
+ if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+ return iommu_types[i];
+ }
+ }
+ error_setg(errp, "No available IOMMU models");
+ return -EINVAL;
+}
+
+/*
+ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
+ */
+static const char *vfio_get_iommu_class_name(int iommu_type)
+{
+ switch (iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ return TYPE_VFIO_IOMMU_LEGACY;
+ break;
+ case VFIO_SPAPR_TCE_v2_IOMMU:
+ case VFIO_SPAPR_TCE_IOMMU:
+ return TYPE_VFIO_IOMMU_SPAPR;
+ break;
+ default:
+ g_assert_not_reached();
+ };
+}
+
+static bool vfio_set_iommu(int container_fd, int group_fd,
+ int *iommu_type, Error **errp)
+{
+ if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) {
+ error_setg_errno(errp, errno, "Failed to set group container");
+ return false;
+ }
+
+ while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
+ if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ /*
+ * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+ * v2, the running platform may not support v2 and there is no
+ * way to guess it until an IOMMU group gets added to the container.
+ * So in case it fails with v2, try v1 as a fallback.
+ */
+ *iommu_type = VFIO_SPAPR_TCE_IOMMU;
+ continue;
+ }
+ error_setg_errno(errp, errno, "Failed to set iommu for container");
+ return false;
+ }
+
+ return true;
+}
+
+static VFIOLegacyContainer *vfio_create_container(int fd, VFIOGroup *group,
+ Error **errp)
+{
+ int iommu_type;
+ const char *vioc_name;
+ VFIOLegacyContainer *container;
+
+ iommu_type = vfio_get_iommu_type(fd, errp);
+ if (iommu_type < 0) {
+ return NULL;
+ }
+
+ /*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+ if (!cpr_is_incoming() &&
+ !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+ return NULL;
+ }
+
+ vioc_name = vfio_get_iommu_class_name(iommu_type);
+
+ container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
+ container->fd = fd;
+ container->iommu_type = iommu_type;
+ return container;
+}
+
+static int vfio_get_iommu_info(VFIOLegacyContainer *container,
+ struct vfio_iommu_type1_info **info)
+{
+
+ size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+ *info = g_new0(struct vfio_iommu_type1_info, 1);
+again:
+ (*info)->argsz = argsz;
+
+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+ g_free(*info);
+ *info = NULL;
+ return -errno;
+ }
+
+ if (((*info)->argsz > argsz)) {
+ argsz = (*info)->argsz;
+ *info = g_realloc(*info, argsz);
+ goto again;
+ }
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+ struct vfio_info_cap_header *hdr;
+ void *ptr = info;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+ return NULL;
+ }
+
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+ if (hdr->id == id) {
+ return hdr;
+ }
+ }
+
+ return NULL;
+}
+
+static void vfio_get_iommu_info_migration(VFIOLegacyContainer *container,
+ struct vfio_iommu_type1_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_migration *cap_mig;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
+ if (!hdr) {
+ return;
+ }
+
+ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
+ header);
+
+ /*
+ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty.
+ */
+ if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
+ bcontainer->dirty_pages_supported = true;
+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
+ }
+}
+
+static bool vfio_legacy_setup(VFIOContainer *bcontainer, Error **errp)
+{
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ g_autofree struct vfio_iommu_type1_info *info = NULL;
+ int ret;
+
+ ret = vfio_get_iommu_info(container, &info);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
+ return false;
+ }
+
+ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
+ bcontainer->pgsizes = info->iova_pgsizes;
+ } else {
+ bcontainer->pgsizes = qemu_real_host_page_size();
+ }
+
+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
+ bcontainer->dma_max_mappings = 65535;
+ }
+
+ vfio_get_info_iova_range(info, bcontainer);
+
+ vfio_get_iommu_info_migration(container, info);
+ return true;
+}
+
+static bool vfio_container_attach_discard_disable(
+ VFIOLegacyContainer *container, VFIOGroup *group, Error **errp)
+{
+ int ret;
+
+ /*
+ * VFIO is currently incompatible with discarding of RAM insofar as the
+ * madvise to purge (zap) the page from QEMU's address space does not
+ * interact with the memory API and therefore leaves stale virtual to
+ * physical mappings in the IOMMU if the page was previously pinned. We
+ * therefore set discarding broken for each group added to a container,
+ * whether the container is used individually or shared. This provides
+ * us with options to allow devices within a group to opt-in and allow
+ * discarding, so long as it is done consistently for a group (for instance
+ * if the device is an mdev device where it is known that the host vendor
+ * driver will never pin pages outside of the working set of the guest
+ * driver, which would thus not be discarding candidates).
+ *
+ * The first opportunity to induce pinning occurs here where we attempt to
+ * attach the group to existing containers within the AddressSpace. If any
+ * pages are already zapped from the virtual address space, such as from
+ * previous discards, new pinning will cause valid mappings to be
+ * re-established. Likewise, when the overall MemoryListener for a new
+ * container is registered, a replay of mappings within the AddressSpace
+ * will occur, re-establishing any previously zapped pages as well.
+ *
+ * Especially virtio-balloon is currently only prevented from discarding
+ * new memory, it will not yet set ram_block_discard_set_required() and
+ * therefore, neither stops us here or deals with the sudden memory
+ * consumption of inflated memory.
+ *
+ * We do support discarding of memory coordinated via the RamDiscardManager
+ * with some IOMMU types. vfio_ram_block_discard_disable() handles the
+ * details once we know which type of IOMMU we are using.
+ */
+
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+ error_report("vfio: error disconnecting group %d from"
+ " container", group->groupid);
+ }
+ }
+ return !ret;
+}
+
+static bool vfio_container_group_add(VFIOLegacyContainer *container,
+ VFIOGroup *group, Error **errp)
+{
+ if (!vfio_container_attach_discard_disable(container, group, errp)) {
+ return false;
+ }
+ group->container = container;
+ QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+ vfio_group_add_kvm_device(group);
+ /*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+ cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}
+
+static void vfio_container_group_del(VFIOLegacyContainer *container,
+ VFIOGroup *group)
+{
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+ vfio_group_del_kvm_device(group);
+ vfio_ram_block_discard_disable(container, false);
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+}
+
+static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
+ Error **errp)
+{
+ VFIOLegacyContainer *container;
+ VFIOContainer *bcontainer;
+ int ret, fd = -1;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc = NULL;
+ bool new_container = false;
+ bool group_was_added = false;
+
+ space = vfio_address_space_get(as);
+ fd = cpr_find_fd("vfio_container_for_group", group->groupid);
+
+ if (!cpr_is_incoming()) {
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = VFIO_IOMMU_LEGACY(bcontainer);
+ if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
+
+ fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+ if (fd < 0) {
+ goto fail;
+ }
+ } else {
+ /*
+ * For incoming CPR, the group is already attached in the kernel.
+ * If a container with matching fd is found, then update the
+ * userland group list and return. If not, then after the loop,
+ * create the container struct and group list.
+ */
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = VFIO_IOMMU_LEGACY(bcontainer);
+
+ if (vfio_cpr_container_match(container, group, fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
+ }
+
+ ret = ioctl(fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ error_setg(errp, "supported vfio version: %d, "
+ "reported version: %d", VFIO_API_VERSION, ret);
+ goto fail;
+ }
+
+ container = vfio_create_container(fd, group, errp);
+ if (!container) {
+ goto fail;
+ }
+ new_container = true;
+ bcontainer = VFIO_IOMMU(container);
+
+ if (!vfio_legacy_cpr_register_container(container, errp)) {
+ goto fail;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto fail;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_container_group_add(container, group, errp)) {
+ goto fail;
+ }
+ group_was_added = true;
+
+ /*
+ * If CPR, register the listener later, after all state that may
+ * affect regions and mapping boundaries has been cpr load'ed. Later,
+ * the listener will invoke its callback on each flat section and call
+ * dma_map to supply the new vaddr, and the calls will match the mappings
+ * remembered by the kernel.
+ */
+ if (!cpr_is_incoming()) {
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto fail;
+ }
+ }
+
+ bcontainer->initialized = true;
+
+ return true;
+
+fail:
+ if (new_container) {
+ vfio_listener_unregister(bcontainer);
+ }
+
+ if (group_was_added) {
+ vfio_container_group_del(container, group);
+ }
+ if (vioc && vioc->release) {
+ vioc->release(bcontainer);
+ }
+ if (new_container) {
+ vfio_legacy_cpr_unregister_container(container);
+ object_unref(container);
+ }
+ if (fd >= 0) {
+ close(fd);
+ }
+ vfio_address_space_put(space);
+
+ return false;
+}
+
+static void vfio_container_disconnect(VFIOGroup *group)
+{
+ VFIOLegacyContainer *container = group->container;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+
+ /*
+ * Explicitly release the listener first before unset container,
+ * since unset may destroy the backend container if it's the last
+ * group.
+ */
+ if (QLIST_EMPTY(&container->group_list)) {
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+ }
+
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+ error_report("vfio: error disconnecting group %d from container",
+ group->groupid);
+ }
+
+ if (QLIST_EMPTY(&container->group_list)) {
+ VFIOAddressSpace *space = bcontainer->space;
+
+ trace_vfio_container_disconnect(container->fd);
+ vfio_legacy_cpr_unregister_container(container);
+ close(container->fd);
+ object_unref(container);
+
+ vfio_address_space_put(space);
+ }
+}
+
+static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOGroup *group;
+ char path[32];
+ struct vfio_group_status status = { .argsz = sizeof(status) };
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == groupid) {
+ /* Found it. Now is it already in the right context? */
+ if (VFIO_IOMMU(group->container)->space->as == as) {
+ return group;
+ } else {
+ error_setg(errp, "group %d used in multiple address spaces",
+ group->groupid);
+ return NULL;
+ }
+ }
+ }
+
+ group = g_malloc0(sizeof(*group));
+
+ snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+ group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
+ if (group->fd < 0) {
+ goto free_group_exit;
+ }
+
+ if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+ error_setg_errno(errp, errno, "failed to get group %d status", groupid);
+ goto close_fd_exit;
+ }
+
+ if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ error_setg(errp, "group %d is not viable", groupid);
+ error_append_hint(errp,
+ "Please ensure all devices within the iommu_group "
+ "are bound to their vfio bus driver.\n");
+ goto close_fd_exit;
+ }
+
+ group->groupid = groupid;
+ QLIST_INIT(&group->device_list);
+
+ if (!vfio_container_connect(group, as, errp)) {
+ error_prepend(errp, "failed to setup container for group %d: ",
+ groupid);
+ goto close_fd_exit;
+ }
+
+ QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
+ return group;
+
+close_fd_exit:
+ cpr_delete_fd("vfio_group", groupid);
+ close(group->fd);
+
+free_group_exit:
+ g_free(group);
+
+ return NULL;
+}
+
+static void vfio_group_put(VFIOGroup *group)
+{
+ if (!group || !QLIST_EMPTY(&group->device_list)) {
+ return;
+ }
+
+ if (!group->ram_block_discard_allowed) {
+ vfio_ram_block_discard_disable(group->container, false);
+ }
+ vfio_group_del_kvm_device(group);
+ vfio_container_disconnect(group);
+ QLIST_REMOVE(group, next);
+ trace_vfio_group_put(group->fd);
+ cpr_delete_fd("vfio_group", group->groupid);
+ close(group->fd);
+ g_free(group);
+}
+
+static bool vfio_device_get(VFIOGroup *group, const char *name,
+ VFIODevice *vbasedev, Error **errp)
+{
+ g_autofree struct vfio_device_info *info = NULL;
+ int fd;
+
+ fd = vfio_cpr_group_get_device_fd(group->fd, name);
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "error getting device from group %d",
+ group->groupid);
+ error_append_hint(errp,
+ "Verify all devices in group %d are bound to vfio-<bus> "
+ "or pci-stub and not already in use\n", group->groupid);
+ return false;
+ }
+
+ info = vfio_get_device_info(fd);
+ if (!info) {
+ error_setg_errno(errp, errno, "error getting device info");
+ goto fail;
+ }
+
+ /*
+ * Set discarding of RAM as not broken for this group if the driver knows
+ * the device operates compatibly with discarding. Setting must be
+ * consistent per group, but since compatibility is really only possible
+ * with mdev currently, we expect singleton groups.
+ */
+ if (vbasedev->ram_block_discard_allowed !=
+ group->ram_block_discard_allowed) {
+ if (!QLIST_EMPTY(&group->device_list)) {
+ error_setg(errp, "Inconsistent setting of support for discarding "
+ "RAM (e.g., balloon) within group");
+ goto fail;
+ }
+
+ if (!group->ram_block_discard_allowed) {
+ group->ram_block_discard_allowed = true;
+ vfio_ram_block_discard_disable(group->container, false);
+ }
+ }
+
+ vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info);
+
+ vbasedev->fd = fd;
+ vbasedev->group = group;
+ QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+
+ trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
+
+ return true;
+
+fail:
+ close(fd);
+ cpr_delete_fd(name, 0);
+ return false;
+}
+
+static void vfio_device_put(VFIODevice *vbasedev)
+{
+ if (!vbasedev->group) {
+ return;
+ }
+ QLIST_REMOVE(vbasedev, next);
+ vbasedev->group = NULL;
+ trace_vfio_device_put(vbasedev->fd);
+ cpr_delete_fd(vbasedev->name, 0);
+ close(vbasedev->fd);
+}
+
+static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp)
+{
+ char *tmp, group_path[PATH_MAX];
+ g_autofree char *group_name = NULL;
+ int ret, groupid;
+ ssize_t len;
+
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
+
+ if (len <= 0 || len >= sizeof(group_path)) {
+ ret = len < 0 ? -errno : -ENAMETOOLONG;
+ error_setg_errno(errp, -ret, "no iommu_group found");
+ return ret;
+ }
+
+ group_path[len] = 0;
+
+ group_name = g_path_get_basename(group_path);
+ if (sscanf(group_name, "%d", &groupid) != 1) {
+ error_setg_errno(errp, errno, "failed to read %s", group_path);
+ return -errno;
+ }
+ return groupid;
+}
+
+/*
+ * vfio_device_attach: attach a device to a security context
+ * @name and @vbasedev->name are likely to be different depending
+ * on the type of the device, hence the need for passing @name
+ */
+static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ int groupid = vfio_device_get_groupid(vbasedev, errp);
+ VFIODevice *vbasedev_iter;
+ VFIOGroup *group;
+
+ if (groupid < 0) {
+ return false;
+ }
+
+ trace_vfio_device_attach(vbasedev->name, groupid);
+
+ group = vfio_group_get(groupid, as, errp);
+ if (!group) {
+ return false;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+ error_setg(errp, "device is already attached");
+ goto group_put_exit;
+ }
+ }
+ if (!vfio_device_get(group, name, vbasedev, errp)) {
+ goto group_put_exit;
+ }
+
+ if (!vfio_device_hiod_create_and_realize(vbasedev,
+ TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+ errp)) {
+ goto device_put_exit;
+ }
+
+ if (vbasedev->mdev) {
+ error_setg(&vbasedev->cpr.mdev_blocker,
+ "CPR does not support vfio mdev %s", vbasedev->name);
+ if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) < 0) {
+ goto hiod_unref_exit;
+ }
+ }
+
+ return true;
+
+hiod_unref_exit:
+ object_unref(vbasedev->hiod);
+device_put_exit:
+ vfio_device_put(vbasedev);
+group_put_exit:
+ vfio_group_put(group);
+ return false;
+}
+
+static void vfio_legacy_detach_device(VFIODevice *vbasedev)
+{
+ VFIOGroup *group = vbasedev->group;
+
+ trace_vfio_device_detach(vbasedev->name, group->groupid);
+
+ vfio_device_unprepare(vbasedev);
+
+ migrate_del_blocker(&vbasedev->cpr.mdev_blocker);
+ object_unref(vbasedev->hiod);
+ vfio_device_put(vbasedev);
+ vfio_group_put(group);
+}
+
+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIOGroup *group;
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ struct vfio_pci_hot_reset *reset;
+ int32_t *fds;
+ int ret, i, count;
+ bool multi = false;
+
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+ if (!single) {
+ vfio_pci_pre_reset(vdev);
+ }
+ vdev->vbasedev.needs_reset = false;
+
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+ if (ret) {
+ goto out_single;
+ }
+ devices = &info->devices[0];
+
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+ /* Verify that we have all the groups required */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ trace_vfio_pci_hot_reset_dep_devices(host.domain,
+ host.bus, host.slot, host.function, devices[i].group_id);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ if (!vdev->has_pm_reset) {
+ error_report("vfio: Cannot reset device %s, "
+ "depends on group %d which is not owned.",
+ vdev->vbasedev.name, devices[i].group_id);
+ }
+ ret = -EPERM;
+ goto out;
+ }
+
+ /* Prep dependent devices for reset and clear our marker. */
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ !vfio_pci_from_vfio_device(vbasedev_iter)) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ if (single) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+ vfio_pci_pre_reset(tmp);
+ tmp->vbasedev.needs_reset = false;
+ multi = true;
+ break;
+ }
+ }
+ }
+
+ if (!single && !multi) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+
+ /* Determine how many group fds need to be passed */
+ count = 0;
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ count++;
+ break;
+ }
+ }
+ }
+
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+ fds = &reset->group_fds[0];
+
+ /* Fill in group fds */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ fds[reset->count++] = group->fd;
+ break;
+ }
+ }
+ }
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+ if (ret) {
+ ret = -errno;
+ }
+
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+ ret ? strerror(errno) : "Success");
+
+out:
+ /* Re-enable INTx on affected devices */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ break;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ !vfio_pci_from_vfio_device(vbasedev_iter)) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ vfio_pci_post_reset(tmp);
+ break;
+ }
+ }
+ }
+out_single:
+ if (!single) {
+ vfio_pci_post_reset(vdev);
+ }
+ g_free(info);
+
+ return ret;
+}
+
+static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_legacy_setup;
+ vioc->dma_map = vfio_legacy_dma_map;
+ vioc->dma_unmap = vfio_legacy_dma_unmap;
+ vioc->attach_device = vfio_legacy_attach_device;
+ vioc->detach_device = vfio_legacy_detach_device;
+ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
+};
+
+static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+ Error **errp)
+{
+ VFIODevice *vdev = opaque;
+
+ hiod->name = g_strdup(vdev->name);
+ hiod->agent = opaque;
+
+ return true;
+}
+
+static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
+ Error **errp)
+{
+ switch (cap) {
+ case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+ return vfio_device_get_aw_bits(hiod->agent);
+ default:
+ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+ return -EINVAL;
+ }
+}
+
+static GList *
+hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod)
+{
+ VFIODevice *vdev = hiod->agent;
+
+ g_assert(vdev);
+ return vfio_container_get_iova_ranges(vdev->bcontainer);
+}
+
+static uint64_t
+hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
+{
+ VFIODevice *vdev = hiod->agent;
+
+ g_assert(vdev);
+ return vfio_container_get_page_size_mask(vdev->bcontainer);
+}
+
+static void vfio_iommu_legacy_instance_init(Object *obj)
+{
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(obj);
+
+ QLIST_INIT(&container->group_list);
+}
+
+static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data)
+{
+ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hioc->realize = hiod_legacy_vfio_realize;
+ hioc->get_cap = hiod_legacy_vfio_get_cap;
+ hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
+ hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_LEGACY,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_init = vfio_iommu_legacy_instance_init,
+ .instance_size = sizeof(VFIOLegacyContainer),
+ .class_init = vfio_iommu_legacy_class_init,
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+ .parent = TYPE_HOST_IOMMU_DEVICE,
+ .class_init = hiod_legacy_vfio_class_init,
+ }
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 030c6d3..250b20f 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1,1275 +1,350 @@
/*
- * generic functions used by VFIO devices
+ * VFIO BASE CONTAINER
*
- * Copyright Red Hat, Inc. 2012
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
*
- * Authors:
- * Alex Williamson <alex.williamson@redhat.com>
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Based on qemu-kvm device-assignment:
- * Adapted for KVM by Qumranet.
- * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
- * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
- * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
- * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
- * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ * SPDX-License-Identifier: GPL-2.0-or-later
*/
-#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <linux/vfio.h>
-#include "hw/vfio/vfio-device.h"
-#include "system/address-spaces.h"
-#include "system/memory.h"
+#include "qemu/osdep.h"
+#include "system/tcg.h"
#include "system/ram_addr.h"
-#include "qemu/error-report.h"
-#include "qemu/range.h"
-#include "system/reset.h"
-#include "trace.h"
#include "qapi/error.h"
-#include "migration/cpr.h"
-#include "migration/blocker.h"
-#include "pci.h"
+#include "qemu/error-report.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
+#include "system/reset.h"
#include "vfio-helpers.h"
-#include "vfio-listener.h"
-
-#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
-
-typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
-static VFIOGroupList vfio_group_list =
- QLIST_HEAD_INITIALIZER(vfio_group_list);
-
-static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
-{
- switch (container->iommu_type) {
- case VFIO_TYPE1v2_IOMMU:
- case VFIO_TYPE1_IOMMU:
- /*
- * We support coordinated discarding of RAM via the RamDiscardManager.
- */
- return ram_block_uncoordinated_discard_disable(state);
- default:
- /*
- * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
- * RamDiscardManager, however, it is completely untested.
- *
- * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
- * completely the opposite of managing mapping/pinning dynamically as
- * required by RamDiscardManager. We would have to special-case sections
- * with a RamDiscardManager.
- */
- return ram_block_discard_disable(state);
- }
-}
-
-static int vfio_dma_unmap_bitmap(const VFIOContainer *container,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
-{
- const VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
- struct vfio_iommu_type1_dma_unmap *unmap;
- struct vfio_bitmap *bitmap;
- VFIOBitmap vbmap;
- int ret;
-
- ret = vfio_bitmap_alloc(&vbmap, size);
- if (ret) {
- return ret;
- }
-
- unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
-
- unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
- unmap->iova = iova;
- unmap->size = size;
- unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
- bitmap = (struct vfio_bitmap *)&unmap->data;
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
- * to qemu_real_host_page_size.
- */
- bitmap->pgsize = qemu_real_host_page_size();
- bitmap->size = vbmap.size;
- bitmap->data = (__u64 *)vbmap.bitmap;
-
- if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
- error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
- ret = -E2BIG;
- goto unmap_exit;
- }
-
- ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
- if (!ret) {
- cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
- iotlb->translated_addr, vbmap.pages);
- } else {
- error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
- }
-
-unmap_exit:
- g_free(unmap);
- g_free(vbmap.bitmap);
-
- return ret;
-}
-
-static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
-{
- const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- struct vfio_iommu_type1_dma_unmap unmap = {
- .argsz = sizeof(unmap),
- .flags = 0,
- .iova = iova,
- .size = size,
- };
- bool need_dirty_sync = false;
- int ret;
- Error *local_err = NULL;
-
- g_assert(!cpr_is_incoming());
-
- if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
- if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
- bcontainer->dirty_pages_supported) {
- return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
- }
- need_dirty_sync = true;
- }
-
- while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- /*
- * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
- * v4.15) where an overflow in its wrap-around check prevents us from
- * unmapping the last page of the address space. Test for the error
- * condition and re-try the unmap excluding the last page. The
- * expectation is that we've never mapped the last page anyway and this
- * unmap request comes via vIOMMU support which also makes it unlikely
- * that this page is used. This bug was introduced well after type1 v2
- * support was introduced, so we shouldn't need to test for v1. A fix
- * is queued for kernel v5.0 so this workaround can be removed once
- * affected kernels are sufficiently deprecated.
- */
- if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
- container->iommu_type == VFIO_TYPE1v2_IOMMU) {
- trace_vfio_legacy_dma_unmap_overflow_workaround();
- unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
- continue;
- }
- return -errno;
- }
-
- if (need_dirty_sync) {
- ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size,
- iotlb->translated_addr, &local_err);
- if (ret) {
- error_report_err(local_err);
- return ret;
- }
- }
+#include "trace.h"
- return 0;
-}
+static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
+ QLIST_HEAD_INITIALIZER(vfio_address_spaces);
-/*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all)
+VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
{
- int ret;
-
- if (unmap_all) {
- /* The unmap ioctl doesn't accept a full 64-bit span. */
- Int128 llsize = int128_rshift(int128_2_64(), 1);
-
- ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
- iotlb);
+ VFIOAddressSpace *space;
- if (ret == 0) {
- ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
- int128_get64(llsize), iotlb);
+ QLIST_FOREACH(space, &vfio_address_spaces, list) {
+ if (space->as == as) {
+ return space;
}
-
- } else {
- ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
- }
-
- return ret;
-}
-
-static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly,
- MemoryRegion *mr)
-{
- const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- struct vfio_iommu_type1_dma_map map = {
- .argsz = sizeof(map),
- .flags = VFIO_DMA_MAP_FLAG_READ,
- .vaddr = (__u64)(uintptr_t)vaddr,
- .iova = iova,
- .size = size,
- };
-
- if (!readonly) {
- map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
- }
-
- /*
- * Try the mapping, if it fails with EBUSY, unmap the region and try
- * again. This shouldn't be necessary, but we sometimes see it in
- * the VGA ROM space.
- */
- if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY &&
- vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 &&
- ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
- return 0;
- }
-
- return -errno;
-}
-
-static int
-vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
- bool start, Error **errp)
-{
- const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- int ret;
- struct vfio_iommu_type1_dirty_bitmap dirty = {
- .argsz = sizeof(dirty),
- };
-
- if (start) {
- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
- } else {
- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
- }
-
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
- if (ret) {
- ret = -errno;
- error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x",
- dirty.flags);
- }
-
- return ret;
-}
-
-static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- struct vfio_iommu_type1_dirty_bitmap *dbitmap;
- struct vfio_iommu_type1_dirty_bitmap_get *range;
- int ret;
-
- dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
-
- dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
- range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
- range->iova = iova;
- range->size = size;
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize
- * to qemu_real_host_page_size.
- */
- range->bitmap.pgsize = qemu_real_host_page_size();
- range->bitmap.size = vbmap->size;
- range->bitmap.data = (__u64 *)vbmap->bitmap;
-
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
- if (ret) {
- ret = -errno;
- error_setg_errno(errp, errno,
- "Failed to get dirty bitmap for iova: 0x%"PRIx64
- " size: 0x%"PRIx64, (uint64_t)range->iova,
- (uint64_t)range->size);
- }
-
- g_free(dbitmap);
-
- return ret;
-}
-
-static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
- VFIOContainerBase *bcontainer)
-{
- struct vfio_info_cap_header *hdr;
- struct vfio_iommu_type1_info_cap_iova_range *cap;
-
- hdr = vfio_get_iommu_type1_info_cap(info,
- VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
- if (!hdr) {
- return false;
}
- cap = (void *)hdr;
-
- for (int i = 0; i < cap->nr_iovas; i++) {
- Range *range = g_new(Range, 1);
+ /* No suitable VFIOAddressSpace, create a new one */
+ space = g_malloc0(sizeof(*space));
+ space->as = as;
+ QLIST_INIT(&space->containers);
- range_set_bounds(range, cap->iova_ranges[i].start,
- cap->iova_ranges[i].end);
- bcontainer->iova_ranges =
- range_list_insert(bcontainer->iova_ranges, range);
+ if (QLIST_EMPTY(&vfio_address_spaces)) {
+ qemu_register_reset(vfio_device_reset_handler, NULL);
}
- return true;
-}
-
-static void vfio_group_add_kvm_device(VFIOGroup *group)
-{
- Error *err = NULL;
+ QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
- if (vfio_kvm_device_add_fd(group->fd, &err)) {
- error_reportf_err(err, "group ID %d: ", group->groupid);
- }
+ return space;
}
-static void vfio_group_del_kvm_device(VFIOGroup *group)
+void vfio_address_space_put(VFIOAddressSpace *space)
{
- Error *err = NULL;
-
- if (vfio_kvm_device_del_fd(group->fd, &err)) {
- error_reportf_err(err, "group ID %d: ", group->groupid);
+ if (!QLIST_EMPTY(&space->containers)) {
+ return;
}
-}
-/*
- * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
- */
-static int vfio_get_iommu_type(int container_fd,
- Error **errp)
-{
- int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
- VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
- int i;
+ QLIST_REMOVE(space, list);
+ g_free(space);
- for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
- if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
- return iommu_types[i];
- }
+ if (QLIST_EMPTY(&vfio_address_spaces)) {
+ qemu_unregister_reset(vfio_device_reset_handler, NULL);
}
- error_setg(errp, "No available IOMMU models");
- return -EINVAL;
}
-/*
- * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
- */
-static const char *vfio_get_iommu_class_name(int iommu_type)
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainer *bcontainer)
{
- switch (iommu_type) {
- case VFIO_TYPE1v2_IOMMU:
- case VFIO_TYPE1_IOMMU:
- return TYPE_VFIO_IOMMU_LEGACY;
- break;
- case VFIO_SPAPR_TCE_v2_IOMMU:
- case VFIO_SPAPR_TCE_IOMMU:
- return TYPE_VFIO_IOMMU_SPAPR;
- break;
- default:
- g_assert_not_reached();
- };
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
+ bcontainer->space = space;
}
-static bool vfio_set_iommu(int container_fd, int group_fd,
- int *iommu_type, Error **errp)
+int vfio_container_dma_map(VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
- if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) {
- error_setg_errno(errp, errno, "Failed to set group container");
- return false;
- }
-
- while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
- if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
- /*
- * On sPAPR, despite the IOMMU subdriver always advertises v1 and
- * v2, the running platform may not support v2 and there is no
- * way to guess it until an IOMMU group gets added to the container.
- * So in case it fails with v2, try v1 as a fallback.
- */
- *iommu_type = VFIO_SPAPR_TCE_IOMMU;
- continue;
- }
- error_setg_errno(errp, errno, "Failed to set iommu for container");
- return false;
- }
-
- return true;
-}
-
-static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
- Error **errp)
-{
- int iommu_type;
- const char *vioc_name;
- VFIOContainer *container;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ RAMBlock *rb = mr->ram_block;
+ int mfd = rb ? qemu_ram_get_fd(rb) : -1;
- iommu_type = vfio_get_iommu_type(fd, errp);
- if (iommu_type < 0) {
- return NULL;
- }
+ if (mfd >= 0 && vioc->dma_map_file) {
+ unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
+ unsigned long offset = qemu_ram_get_fd_offset(rb);
- /*
- * During CPR, just set the container type and skip the ioctls, as the
- * container and group are already configured in the kernel.
- */
- if (!cpr_is_incoming() &&
- !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
- return NULL;
+ return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
+ readonly);
}
-
- vioc_name = vfio_get_iommu_class_name(iommu_type);
-
- container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
- container->fd = fd;
- container->iommu_type = iommu_type;
- return container;
+ g_assert(vioc->dma_map);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
-static int vfio_get_iommu_info(VFIOContainer *container,
- struct vfio_iommu_type1_info **info)
+int vfio_container_dma_unmap(VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- size_t argsz = sizeof(struct vfio_iommu_type1_info);
-
- *info = g_new0(struct vfio_iommu_type1_info, 1);
-again:
- (*info)->argsz = argsz;
-
- if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
- g_free(*info);
- *info = NULL;
- return -errno;
- }
-
- if (((*info)->argsz > argsz)) {
- argsz = (*info)->argsz;
- *info = g_realloc(*info, argsz);
- goto again;
- }
-
- return 0;
+ g_assert(vioc->dma_unmap);
+ return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
}
-static struct vfio_info_cap_header *
-vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+bool vfio_container_add_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp)
{
- struct vfio_info_cap_header *hdr;
- void *ptr = info;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
- return NULL;
+ if (!vioc->add_window) {
+ return true;
}
- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
- if (hdr->id == id) {
- return hdr;
- }
- }
-
- return NULL;
+ return vioc->add_window(bcontainer, section, errp);
}
-static void vfio_get_iommu_info_migration(VFIOContainer *container,
- struct vfio_iommu_type1_info *info)
+void vfio_container_del_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section)
{
- struct vfio_info_cap_header *hdr;
- struct vfio_iommu_type1_info_cap_migration *cap_mig;
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
- if (!hdr) {
+ if (!vioc->del_window) {
return;
}
- cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
- header);
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty.
- */
- if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
- bcontainer->dirty_pages_supported = true;
- bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
- bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
- }
+ return vioc->del_window(bcontainer, section);
}
-static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp)
+int vfio_container_set_dirty_page_tracking(VFIOContainer *bcontainer,
+ bool start, Error **errp)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- g_autofree struct vfio_iommu_type1_info *info = NULL;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
int ret;
- ret = vfio_get_iommu_info(container, &info);
- if (ret) {
- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
- return false;
+ if (!bcontainer->dirty_pages_supported) {
+ return 0;
}
- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
- bcontainer->pgsizes = info->iova_pgsizes;
- } else {
- bcontainer->pgsizes = qemu_real_host_page_size();
+ g_assert(vioc->set_dirty_page_tracking);
+ if (bcontainer->dirty_pages_started == start) {
+ return 0;
}
- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
- bcontainer->dma_max_mappings = 65535;
+ ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
+ if (!ret) {
+ bcontainer->dirty_pages_started = start;
}
- vfio_get_info_iova_range(info, bcontainer);
-
- vfio_get_iommu_info_migration(container, info);
- return true;
+ return ret;
}
-static bool vfio_container_attach_discard_disable(VFIOContainer *container,
- VFIOGroup *group, Error **errp)
+static bool vfio_container_devices_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer)
{
- int ret;
+ VFIODevice *vbasedev;
- /*
- * VFIO is currently incompatible with discarding of RAM insofar as the
- * madvise to purge (zap) the page from QEMU's address space does not
- * interact with the memory API and therefore leaves stale virtual to
- * physical mappings in the IOMMU if the page was previously pinned. We
- * therefore set discarding broken for each group added to a container,
- * whether the container is used individually or shared. This provides
- * us with options to allow devices within a group to opt-in and allow
- * discarding, so long as it is done consistently for a group (for instance
- * if the device is an mdev device where it is known that the host vendor
- * driver will never pin pages outside of the working set of the guest
- * driver, which would thus not be discarding candidates).
- *
- * The first opportunity to induce pinning occurs here where we attempt to
- * attach the group to existing containers within the AddressSpace. If any
- * pages are already zapped from the virtual address space, such as from
- * previous discards, new pinning will cause valid mappings to be
- * re-established. Likewise, when the overall MemoryListener for a new
- * container is registered, a replay of mappings within the AddressSpace
- * will occur, re-establishing any previously zapped pages as well.
- *
- * Especially virtio-balloon is currently only prevented from discarding
- * new memory, it will not yet set ram_block_discard_set_required() and
- * therefore, neither stops us here or deals with the sudden memory
- * consumption of inflated memory.
- *
- * We do support discarding of memory coordinated via the RamDiscardManager
- * with some IOMMU types. vfio_ram_block_discard_disable() handles the
- * details once we know which type of IOMMU we are using.
- */
-
- ret = vfio_ram_block_discard_disable(container, true);
- if (ret) {
- error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
- error_report("vfio: error disconnecting group %d from"
- " container", group->groupid);
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (!vbasedev->dirty_tracking) {
+ return false;
}
}
- return !ret;
-}
-static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
- Error **errp)
-{
- if (!vfio_container_attach_discard_disable(container, group, errp)) {
- return false;
- }
- group->container = container;
- QLIST_INSERT_HEAD(&container->group_list, group, container_next);
- vfio_group_add_kvm_device(group);
- /*
- * Remember the container fd for each group, so we can attach to the same
- * container after CPR.
- */
- cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
return true;
}
-static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
+bool vfio_container_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer)
{
- QLIST_REMOVE(group, container_next);
- group->container = NULL;
- vfio_group_del_kvm_device(group);
- vfio_ram_block_discard_disable(container, false);
- cpr_delete_fd("vfio_container_for_group", group->groupid);
+ return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
+ bcontainer->dirty_pages_started;
}
-static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
- Error **errp)
+bool vfio_container_devices_dirty_tracking_is_supported(
+ const VFIOContainer *bcontainer)
{
- VFIOContainer *container;
- VFIOContainerBase *bcontainer;
- int ret, fd = -1;
- VFIOAddressSpace *space;
- VFIOIOMMUClass *vioc = NULL;
- bool new_container = false;
- bool group_was_added = false;
-
- space = vfio_address_space_get(as);
- fd = cpr_find_fd("vfio_container_for_group", group->groupid);
-
- if (!cpr_is_incoming()) {
- QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = VFIO_IOMMU_LEGACY(bcontainer);
- if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
- return vfio_container_group_add(container, group, errp);
- }
- }
+ VFIODevice *vbasedev;
- fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
- if (fd < 0) {
- goto fail;
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
+ return false;
}
- } else {
- /*
- * For incoming CPR, the group is already attached in the kernel.
- * If a container with matching fd is found, then update the
- * userland group list and return. If not, then after the loop,
- * create the container struct and group list.
- */
- QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = VFIO_IOMMU_LEGACY(bcontainer);
-
- if (vfio_cpr_container_match(container, group, fd)) {
- return vfio_container_group_add(container, group, errp);
- }
- }
- }
-
- ret = ioctl(fd, VFIO_GET_API_VERSION);
- if (ret != VFIO_API_VERSION) {
- error_setg(errp, "supported vfio version: %d, "
- "reported version: %d", VFIO_API_VERSION, ret);
- goto fail;
- }
-
- container = vfio_create_container(fd, group, errp);
- if (!container) {
- goto fail;
- }
- new_container = true;
- bcontainer = VFIO_IOMMU(container);
-
- if (!vfio_legacy_cpr_register_container(container, errp)) {
- goto fail;
- }
-
- vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- assert(vioc->setup);
-
- if (!vioc->setup(bcontainer, errp)) {
- goto fail;
- }
-
- vfio_address_space_insert(space, bcontainer);
-
- if (!vfio_container_group_add(container, group, errp)) {
- goto fail;
- }
- group_was_added = true;
-
- /*
- * If CPR, register the listener later, after all state that may
- * affect regions and mapping boundaries has been cpr load'ed. Later,
- * the listener will invoke its callback on each flat section and call
- * dma_map to supply the new vaddr, and the calls will match the mappings
- * remembered by the kernel.
- */
- if (!cpr_is_incoming()) {
- if (!vfio_listener_register(bcontainer, errp)) {
- goto fail;
+ if (!vbasedev->dirty_pages_supported) {
+ return false;
}
}
- bcontainer->initialized = true;
-
return true;
-
-fail:
- if (new_container) {
- vfio_listener_unregister(bcontainer);
- }
-
- if (group_was_added) {
- vfio_container_group_del(container, group);
- }
- if (vioc && vioc->release) {
- vioc->release(bcontainer);
- }
- if (new_container) {
- vfio_legacy_cpr_unregister_container(container);
- object_unref(container);
- }
- if (fd >= 0) {
- close(fd);
- }
- vfio_address_space_put(space);
-
- return false;
-}
-
-static void vfio_container_disconnect(VFIOGroup *group)
-{
- VFIOContainer *container = group->container;
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- QLIST_REMOVE(group, container_next);
- group->container = NULL;
- cpr_delete_fd("vfio_container_for_group", group->groupid);
-
- /*
- * Explicitly release the listener first before unset container,
- * since unset may destroy the backend container if it's the last
- * group.
- */
- if (QLIST_EMPTY(&container->group_list)) {
- vfio_listener_unregister(bcontainer);
- if (vioc->release) {
- vioc->release(bcontainer);
- }
- }
-
- if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
- error_report("vfio: error disconnecting group %d from container",
- group->groupid);
- }
-
- if (QLIST_EMPTY(&container->group_list)) {
- VFIOAddressSpace *space = bcontainer->space;
-
- trace_vfio_container_disconnect(container->fd);
- vfio_legacy_cpr_unregister_container(container);
- close(container->fd);
- object_unref(container);
-
- vfio_address_space_put(space);
- }
}
-static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
+static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
+ hwaddr size, void *bitmap)
{
- ERRP_GUARD();
- VFIOGroup *group;
- char path[32];
- struct vfio_group_status status = { .argsz = sizeof(status) };
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == groupid) {
- /* Found it. Now is it already in the right context? */
- if (VFIO_IOMMU(group->container)->space->as == as) {
- return group;
- } else {
- error_setg(errp, "group %d used in multiple address spaces",
- group->groupid);
- return NULL;
- }
- }
- }
+ uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+ sizeof(struct vfio_device_feature_dma_logging_report),
+ sizeof(uint64_t))] = {};
+ struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+ struct vfio_device_feature_dma_logging_report *report =
+ (struct vfio_device_feature_dma_logging_report *)feature->data;
- group = g_malloc0(sizeof(*group));
+ report->iova = iova;
+ report->length = size;
+ report->page_size = qemu_real_host_page_size();
+ report->bitmap = (uintptr_t)bitmap;
- snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
- group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
- if (group->fd < 0) {
- goto free_group_exit;
- }
+ feature->argsz = sizeof(buf);
+ feature->flags = VFIO_DEVICE_FEATURE_GET |
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
- if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
- error_setg_errno(errp, errno, "failed to get group %d status", groupid);
- goto close_fd_exit;
- }
-
- if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
- error_setg(errp, "group %d is not viable", groupid);
- error_append_hint(errp,
- "Please ensure all devices within the iommu_group "
- "are bound to their vfio bus driver.\n");
- goto close_fd_exit;
- }
-
- group->groupid = groupid;
- QLIST_INIT(&group->device_list);
-
- if (!vfio_container_connect(group, as, errp)) {
- error_prepend(errp, "failed to setup container for group %d: ",
- groupid);
- goto close_fd_exit;
- }
-
- QLIST_INSERT_HEAD(&vfio_group_list, group, next);
-
- return group;
-
-close_fd_exit:
- cpr_delete_fd("vfio_group", groupid);
- close(group->fd);
-
-free_group_exit:
- g_free(group);
-
- return NULL;
-}
-
-static void vfio_group_put(VFIOGroup *group)
-{
- if (!group || !QLIST_EMPTY(&group->device_list)) {
- return;
- }
-
- if (!group->ram_block_discard_allowed) {
- vfio_ram_block_discard_disable(group->container, false);
- }
- vfio_group_del_kvm_device(group);
- vfio_container_disconnect(group);
- QLIST_REMOVE(group, next);
- trace_vfio_group_put(group->fd);
- cpr_delete_fd("vfio_group", group->groupid);
- close(group->fd);
- g_free(group);
-}
-
-static bool vfio_device_get(VFIOGroup *group, const char *name,
- VFIODevice *vbasedev, Error **errp)
-{
- g_autofree struct vfio_device_info *info = NULL;
- int fd;
-
- fd = vfio_cpr_group_get_device_fd(group->fd, name);
- if (fd < 0) {
- error_setg_errno(errp, errno, "error getting device from group %d",
- group->groupid);
- error_append_hint(errp,
- "Verify all devices in group %d are bound to vfio-<bus> "
- "or pci-stub and not already in use\n", group->groupid);
- return false;
- }
-
- info = vfio_get_device_info(fd);
- if (!info) {
- error_setg_errno(errp, errno, "error getting device info");
- goto fail;
- }
-
- /*
- * Set discarding of RAM as not broken for this group if the driver knows
- * the device operates compatibly with discarding. Setting must be
- * consistent per group, but since compatibility is really only possible
- * with mdev currently, we expect singleton groups.
- */
- if (vbasedev->ram_block_discard_allowed !=
- group->ram_block_discard_allowed) {
- if (!QLIST_EMPTY(&group->device_list)) {
- error_setg(errp, "Inconsistent setting of support for discarding "
- "RAM (e.g., balloon) within group");
- goto fail;
- }
-
- if (!group->ram_block_discard_allowed) {
- group->ram_block_discard_allowed = true;
- vfio_ram_block_discard_disable(group->container, false);
- }
- }
-
- vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info);
-
- vbasedev->fd = fd;
- vbasedev->group = group;
- QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
-
- trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
-
- return true;
-
-fail:
- close(fd);
- cpr_delete_fd(name, 0);
- return false;
-}
-
-static void vfio_device_put(VFIODevice *vbasedev)
-{
- if (!vbasedev->group) {
- return;
- }
- QLIST_REMOVE(vbasedev, next);
- vbasedev->group = NULL;
- trace_vfio_device_put(vbasedev->fd);
- cpr_delete_fd(vbasedev->name, 0);
- close(vbasedev->fd);
+ return vbasedev->io_ops->device_feature(vbasedev, feature);
}
-static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp)
+static int vfio_container_iommu_query_dirty_bitmap(
+ const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
{
- char *tmp, group_path[PATH_MAX];
- g_autofree char *group_name = NULL;
- int ret, groupid;
- ssize_t len;
-
- tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
- len = readlink(tmp, group_path, sizeof(group_path));
- g_free(tmp);
-
- if (len <= 0 || len >= sizeof(group_path)) {
- ret = len < 0 ? -errno : -ENAMETOOLONG;
- error_setg_errno(errp, -ret, "no iommu_group found");
- return ret;
- }
-
- group_path[len] = 0;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- group_name = g_path_get_basename(group_path);
- if (sscanf(group_name, "%d", &groupid) != 1) {
- error_setg_errno(errp, errno, "failed to read %s", group_path);
- return -errno;
- }
- return groupid;
+ g_assert(vioc->query_dirty_bitmap);
+ return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
+ errp);
}
-/*
- * vfio_device_attach: attach a device to a security context
- * @name and @vbasedev->name are likely to be different depending
- * on the type of the device, hence the need for passing @name
- */
-static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp)
+static int vfio_container_devices_query_dirty_bitmap(
+ const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
{
- int groupid = vfio_device_get_groupid(vbasedev, errp);
- VFIODevice *vbasedev_iter;
- VFIOGroup *group;
-
- if (groupid < 0) {
- return false;
- }
-
- trace_vfio_device_attach(vbasedev->name, groupid);
-
- group = vfio_group_get(groupid, as, errp);
- if (!group) {
- return false;
- }
-
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
- error_setg(errp, "device is already attached");
- goto group_put_exit;
- }
- }
- if (!vfio_device_get(group, name, vbasedev, errp)) {
- goto group_put_exit;
- }
+ VFIODevice *vbasedev;
+ int ret;
- if (!vfio_device_hiod_create_and_realize(vbasedev,
- TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
- errp)) {
- goto device_put_exit;
- }
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ ret = vfio_device_dma_logging_report(vbasedev, iova, size,
+ vbmap->bitmap);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "%s: Failed to get DMA logging report, iova: "
+ "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
+ vbasedev->name, iova, size);
- if (vbasedev->mdev) {
- error_setg(&vbasedev->cpr.mdev_blocker,
- "CPR does not support vfio mdev %s", vbasedev->name);
- if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp,
- MIG_MODE_CPR_TRANSFER, -1) < 0) {
- goto hiod_unref_exit;
+ return ret;
}
}
- return true;
-
-hiod_unref_exit:
- object_unref(vbasedev->hiod);
-device_put_exit:
- vfio_device_put(vbasedev);
-group_put_exit:
- vfio_group_put(group);
- return false;
+ return 0;
}
-static void vfio_legacy_detach_device(VFIODevice *vbasedev)
+int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ uint64_t iova, uint64_t size,
+ ram_addr_t ram_addr, Error **errp)
{
- VFIOGroup *group = vbasedev->group;
-
- trace_vfio_device_detach(vbasedev->name, group->groupid);
-
- vfio_device_unprepare(vbasedev);
-
- migrate_del_blocker(&vbasedev->cpr.mdev_blocker);
- object_unref(vbasedev->hiod);
- vfio_device_put(vbasedev);
- vfio_group_put(group);
-}
+ bool all_device_dirty_tracking =
+ vfio_container_devices_dirty_tracking_is_supported(bcontainer);
+ uint64_t dirty_pages;
+ VFIOBitmap vbmap;
+ int ret;
-static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
-{
- VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
- VFIOGroup *group;
- struct vfio_pci_hot_reset_info *info = NULL;
- struct vfio_pci_dependent_device *devices;
- struct vfio_pci_hot_reset *reset;
- int32_t *fds;
- int ret, i, count;
- bool multi = false;
-
- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
-
- if (!single) {
- vfio_pci_pre_reset(vdev);
+ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
+ cpu_physical_memory_set_dirty_range(ram_addr, size,
+ tcg_enabled() ? DIRTY_CLIENTS_ALL :
+ DIRTY_CLIENTS_NOCODE);
+ return 0;
}
- vdev->vbasedev.needs_reset = false;
-
- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+ ret = vfio_bitmap_alloc(&vbmap, size);
if (ret) {
- goto out_single;
- }
- devices = &info->devices[0];
-
- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
-
- /* Verify that we have all the groups required */
- for (i = 0; i < info->count; i++) {
- PCIHostDeviceAddress host;
- VFIOPCIDevice *tmp;
- VFIODevice *vbasedev_iter;
-
- host.domain = devices[i].segment;
- host.bus = devices[i].bus;
- host.slot = PCI_SLOT(devices[i].devfn);
- host.function = PCI_FUNC(devices[i].devfn);
-
- trace_vfio_pci_hot_reset_dep_devices(host.domain,
- host.bus, host.slot, host.function, devices[i].group_id);
-
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
- continue;
- }
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == devices[i].group_id) {
- break;
- }
- }
-
- if (!group) {
- if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %s, "
- "depends on group %d which is not owned.",
- vdev->vbasedev.name, devices[i].group_id);
- }
- ret = -EPERM;
- goto out;
- }
-
- /* Prep dependent devices for reset and clear our marker. */
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (!vbasedev_iter->dev->realized ||
- !vfio_pci_from_vfio_device(vbasedev_iter)) {
- continue;
- }
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
- if (single) {
- ret = -EINVAL;
- goto out_single;
- }
- vfio_pci_pre_reset(tmp);
- tmp->vbasedev.needs_reset = false;
- multi = true;
- break;
- }
- }
- }
-
- if (!single && !multi) {
- ret = -EINVAL;
- goto out_single;
- }
-
- /* Determine how many group fds need to be passed */
- count = 0;
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- count++;
- break;
- }
- }
+ error_setg_errno(errp, -ret,
+ "Failed to allocate dirty tracking bitmap");
+ return ret;
}
- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
- reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
- fds = &reset->group_fds[0];
-
- /* Fill in group fds */
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- fds[reset->count++] = group->fd;
- break;
- }
- }
+ if (all_device_dirty_tracking) {
+ ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
+ errp);
+ } else {
+ ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
+ errp);
}
- /* Bus reset! */
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
- g_free(reset);
if (ret) {
- ret = -errno;
+ goto out;
}
- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
- ret ? strerror(errno) : "Success");
+ dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
+ vbmap.pages);
+ trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
+ dirty_pages);
out:
- /* Re-enable INTx on affected devices */
- for (i = 0; i < info->count; i++) {
- PCIHostDeviceAddress host;
- VFIOPCIDevice *tmp;
- VFIODevice *vbasedev_iter;
-
- host.domain = devices[i].segment;
- host.bus = devices[i].bus;
- host.slot = PCI_SLOT(devices[i].devfn);
- host.function = PCI_FUNC(devices[i].devfn);
-
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
- continue;
- }
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == devices[i].group_id) {
- break;
- }
- }
-
- if (!group) {
- break;
- }
-
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (!vbasedev_iter->dev->realized ||
- !vfio_pci_from_vfio_device(vbasedev_iter)) {
- continue;
- }
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
- vfio_pci_post_reset(tmp);
- break;
- }
- }
- }
-out_single:
- if (!single) {
- vfio_pci_post_reset(vdev);
- }
- g_free(info);
+ g_free(vbmap.bitmap);
return ret;
}
-static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data)
+static gpointer copy_iova_range(gconstpointer src, gpointer data)
{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
-
- vioc->setup = vfio_legacy_setup;
- vioc->dma_map = vfio_legacy_dma_map;
- vioc->dma_unmap = vfio_legacy_dma_unmap;
- vioc->attach_device = vfio_legacy_attach_device;
- vioc->detach_device = vfio_legacy_detach_device;
- vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking;
- vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap;
- vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
-};
-
-static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
- Error **errp)
-{
- VFIODevice *vdev = opaque;
+ Range *source = (Range *)src;
+ Range *dest = g_new(Range, 1);
- hiod->name = g_strdup(vdev->name);
- hiod->agent = opaque;
-
- return true;
+ range_set_bounds(dest, range_lob(source), range_upb(source));
+ return dest;
}
-static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
- Error **errp)
+GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer)
{
- switch (cap) {
- case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return vfio_device_get_aw_bits(hiod->agent);
- default:
- error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
- return -EINVAL;
- }
+ assert(bcontainer);
+ return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
}
-static GList *
-hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod)
+static void vfio_container_instance_finalize(Object *obj)
{
- VFIODevice *vdev = hiod->agent;
+ VFIOContainer *bcontainer = VFIO_IOMMU(obj);
+ VFIOGuestIOMMU *giommu, *tmp;
- g_assert(vdev);
- return vfio_container_get_iova_ranges(vdev->bcontainer);
-}
+ QLIST_SAFE_REMOVE(bcontainer, next);
-static uint64_t
-hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
-{
- VFIODevice *vdev = hiod->agent;
+ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
+ memory_region_unregister_iommu_notifier(
+ MEMORY_REGION(giommu->iommu_mr), &giommu->n);
+ QLIST_REMOVE(giommu, giommu_next);
+ g_free(giommu);
+ }
- g_assert(vdev);
- return vfio_container_get_page_size_mask(vdev->bcontainer);
+ g_list_free_full(bcontainer->iova_ranges, g_free);
}
-static void vfio_iommu_legacy_instance_init(Object *obj)
+static void vfio_container_instance_init(Object *obj)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(obj);
+ VFIOContainer *bcontainer = VFIO_IOMMU(obj);
- QLIST_INIT(&container->group_list);
+ bcontainer->error = NULL;
+ bcontainer->dirty_pages_supported = false;
+ bcontainer->dma_max_mappings = 0;
+ bcontainer->iova_ranges = NULL;
+ QLIST_INIT(&bcontainer->giommu_list);
+ QLIST_INIT(&bcontainer->vrdl_list);
}
-static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data)
-{
- HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
-
- hioc->realize = hiod_legacy_vfio_realize;
- hioc->get_cap = hiod_legacy_vfio_get_cap;
- hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
- hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask;
-};
-
static const TypeInfo types[] = {
{
- .name = TYPE_VFIO_IOMMU_LEGACY,
- .parent = TYPE_VFIO_IOMMU,
- .instance_init = vfio_iommu_legacy_instance_init,
+ .name = TYPE_VFIO_IOMMU,
+ .parent = TYPE_OBJECT,
+ .instance_init = vfio_container_instance_init,
+ .instance_finalize = vfio_container_instance_finalize,
.instance_size = sizeof(VFIOContainer),
- .class_init = vfio_iommu_legacy_class_init,
- }, {
- .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
- .parent = TYPE_HOST_IOMMU_DEVICE,
- .class_init = hiod_legacy_vfio_class_init,
- }
+ .class_size = sizeof(VFIOIOMMUClass),
+ .abstract = true,
+ },
};
DEFINE_TYPES(types)
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
index 148a06d..1d70c87 100644
--- a/hw/vfio/cpr-iommufd.c
+++ b/hw/vfio/cpr-iommufd.c
@@ -176,7 +176,7 @@ void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
Error **errp)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
vfio_cpr_reboot_notifier,
@@ -189,7 +189,7 @@ bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index 8f43719..bbf7a0d 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -7,7 +7,7 @@
#include <sys/ioctl.h>
#include <linux/vfio.h>
#include "qemu/osdep.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-listener.h"
#include "migration/blocker.h"
@@ -17,7 +17,8 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
-static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
+static bool vfio_dma_unmap_vaddr_all(VFIOLegacyContainer *container,
+ Error **errp)
{
struct vfio_iommu_type1_dma_unmap unmap = {
.argsz = sizeof(unmap),
@@ -37,11 +38,11 @@ static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
* Set the new @vaddr for any mappings registered during cpr load.
* The incoming state is cleared thereafter.
*/
-static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
+static int vfio_legacy_cpr_dma_map(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
bool readonly, MemoryRegion *mr)
{
- const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
struct vfio_iommu_type1_dma_map map = {
.argsz = sizeof(map),
@@ -63,12 +64,13 @@ static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
static void vfio_region_remap(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainer *container = container_of(listener, VFIOContainer,
- cpr.remap_listener);
+ VFIOLegacyContainer *container = container_of(listener,
+ VFIOLegacyContainer,
+ cpr.remap_listener);
vfio_container_region_add(VFIO_IOMMU(container), section, true);
}
-static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
+static bool vfio_cpr_supported(VFIOLegacyContainer *container, Error **errp)
{
if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
@@ -85,7 +87,7 @@ static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
static int vfio_container_pre_save(void *opaque)
{
- VFIOContainer *container = opaque;
+ VFIOLegacyContainer *container = opaque;
Error *local_err = NULL;
if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
@@ -97,8 +99,8 @@ static int vfio_container_pre_save(void *opaque)
static int vfio_container_post_load(void *opaque, int version_id)
{
- VFIOContainer *container = opaque;
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOLegacyContainer *container = opaque;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
dma_map_fn saved_dma_map = vioc->dma_map;
Error *local_err = NULL;
@@ -133,9 +135,9 @@ static const VMStateDescription vfio_container_vmstate = {
static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
{
- VFIOContainer *container =
- container_of(notifier, VFIOContainer, cpr.transfer_notifier);
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOLegacyContainer *container =
+ container_of(notifier, VFIOLegacyContainer, cpr.transfer_notifier);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
if (e->type != MIG_EVENT_PRECOPY_FAILED) {
return 0;
@@ -165,9 +167,10 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
return 0;
}
-bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
+bool vfio_legacy_cpr_register_container(VFIOLegacyContainer *container,
+ Error **errp)
{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
Error **cpr_blocker = &container->cpr.blocker;
migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
@@ -189,9 +192,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
return true;
}
-void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
+void vfio_legacy_cpr_unregister_container(VFIOLegacyContainer *container)
{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
migrate_del_blocker(&container->cpr.blocker);
@@ -207,7 +210,7 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
* The giommu already exists. Find it and replay it, which calls
* vfio_legacy_cpr_dma_map further down the stack.
*/
-void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
+void vfio_cpr_giommu_remap(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
VFIOGuestIOMMU *giommu = NULL;
@@ -232,7 +235,7 @@ void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
* The ram discard listener already exists. Call its populate function
* directly, which calls vfio_legacy_cpr_dma_map.
*/
-bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
+bool vfio_cpr_ram_discard_register_listener(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
VFIORamDiscardListener *vrdl =
@@ -263,7 +266,7 @@ static bool same_device(int fd1, int fd2)
return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
}
-bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+bool vfio_cpr_container_match(VFIOLegacyContainer *container, VFIOGroup *group,
int fd)
{
if (container->fd == fd) {
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 08f12ac..64f8750 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -423,7 +423,7 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev,
VFIODevice *vfio_get_vfio_device(Object *obj)
{
if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) {
- return &VFIO_PCI_BASE(obj)->vbasedev;
+ return &VFIO_PCI_DEVICE(obj)->vbasedev;
} else {
return NULL;
}
@@ -460,7 +460,7 @@ void vfio_device_detach(VFIODevice *vbasedev)
VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
}
-void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
+void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
struct vfio_device_info *info)
{
int i;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 8c27222..f0ffe23 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -34,36 +34,33 @@
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
-static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+static int iommufd_cdev_map(const VFIOContainer *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
MemoryRegion *mr)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
return iommufd_backend_map_dma(container->be,
container->ioas_id,
iova, size, vaddr, readonly);
}
-static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer,
+static int iommufd_cdev_map_file(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
int fd, unsigned long start, bool readonly)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
return iommufd_backend_map_file_dma(container->be,
container->ioas_id,
iova, size, fd, start, readonly);
}
-static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
+static int iommufd_cdev_unmap(const VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
/* unmap in halves */
if (unmap_all) {
@@ -159,11 +156,10 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
-static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+static int iommufd_set_dirty_page_tracking(const VFIOContainer *bcontainer,
bool start, Error **errp)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
VFIOIOASHwpt *hwpt;
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
@@ -190,13 +186,11 @@ err:
return -EINVAL;
}
-static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+static int iommufd_query_dirty_bitmap(const VFIOContainer *bcontainer,
VFIOBitmap *vbmap, hwaddr iova,
hwaddr size, Error **errp)
{
- VFIOIOMMUFDContainer *container = container_of(bcontainer,
- VFIOIOMMUFDContainer,
- bcontainer);
+ VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
unsigned long page_size = qemu_real_host_page_size();
VFIOIOASHwpt *hwpt;
@@ -324,6 +318,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
{
ERRP_GUARD();
IOMMUFDBackend *iommufd = vbasedev->iommufd;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
uint32_t type, flags = 0;
uint64_t hw_caps;
VFIOIOASHwpt *hwpt;
@@ -408,9 +403,9 @@ skip_alloc:
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
- container->bcontainer.dirty_pages_supported |=
+ bcontainer->dirty_pages_supported |=
vbasedev->iommu_dirty_tracking;
- if (container->bcontainer.dirty_pages_supported &&
+ if (bcontainer->dirty_pages_supported &&
!vbasedev->iommu_dirty_tracking) {
warn_report("IOMMU instance for device %s doesn't support dirty tracking",
vbasedev->name);
@@ -464,7 +459,7 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
if (!QLIST_EMPTY(&bcontainer->device_list)) {
return;
@@ -486,7 +481,7 @@ static int iommufd_cdev_ram_block_discard_disable(bool state)
static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
uint32_t ioas_id, Error **errp)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
g_autofree struct iommu_ioas_iova_ranges *info = NULL;
struct iommu_iova_range *iova_ranges;
int sz, fd = container->be->fd;
@@ -528,7 +523,7 @@ error:
static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp)
{
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
VFIOIOMMUFDContainer *container;
VFIOAddressSpace *space;
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
@@ -559,7 +554,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
/* try to attach to an existing container in this space */
QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ container = VFIO_IOMMU_IOMMUFD(bcontainer);
if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc ||
vbasedev->iommufd != container->be) {
continue;
@@ -609,7 +604,7 @@ skip_ioas_alloc:
QLIST_INIT(&container->hwpt_list);
vbasedev->cpr.ioas_id = ioas_id;
- bcontainer = &container->bcontainer;
+ bcontainer = VFIO_IOMMU(container);
vfio_address_space_insert(space, bcontainer);
if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
@@ -687,11 +682,10 @@ err_connect_bind:
static void iommufd_cdev_detach(VFIODevice *vbasedev)
{
- VFIOContainerBase *bcontainer = vbasedev->bcontainer;
+ VFIOContainer *bcontainer = vbasedev->bcontainer;
VFIOAddressSpace *space = bcontainer->space;
- VFIOIOMMUFDContainer *container = container_of(bcontainer,
- VFIOIOMMUFDContainer,
- bcontainer);
+ VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
+
vfio_device_unprepare(vbasedev);
if (!vbasedev->ram_block_discard_allowed) {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index e093833..3b6f17f 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -52,7 +52,7 @@
*/
-static bool vfio_log_sync_needed(const VFIOContainerBase *bcontainer)
+static bool vfio_log_sync_needed(const VFIOContainer *bcontainer)
{
VFIODevice *vbasedev;
@@ -125,7 +125,7 @@ static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
- VFIOContainerBase *bcontainer = giommu->bcontainer;
+ VFIOContainer *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
MemoryRegion *mr;
hwaddr xlat;
@@ -202,7 +202,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
{
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
listener);
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
+ VFIOContainer *bcontainer = vrdl->bcontainer;
const hwaddr size = int128_get64(section->size);
const hwaddr iova = section->offset_within_address_space;
int ret;
@@ -220,7 +220,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
{
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
listener);
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
+ VFIOContainer *bcontainer = vrdl->bcontainer;
const hwaddr end = section->offset_within_region +
int128_get64(section->size);
hwaddr start, next, iova;
@@ -250,7 +250,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
return 0;
}
-static bool vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer,
+static bool vfio_ram_discard_register_listener(VFIOContainer *bcontainer,
MemoryRegionSection *section,
Error **errp)
{
@@ -328,7 +328,7 @@ static bool vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer,
return true;
}
-static void vfio_ram_discard_unregister_listener(VFIOContainerBase *bcontainer,
+static void vfio_ram_discard_unregister_listener(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
@@ -396,7 +396,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section,
return true;
}
-static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
+static bool vfio_get_section_iova_range(VFIOContainer *bcontainer,
MemoryRegionSection *section,
hwaddr *out_iova, hwaddr *out_end,
Int128 *out_llend)
@@ -423,9 +423,9 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
static void vfio_listener_begin(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
- void (*listener_begin)(VFIOContainerBase *bcontainer);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
+ void (*listener_begin)(VFIOContainer *bcontainer);
listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin;
@@ -436,9 +436,9 @@ static void vfio_listener_begin(MemoryListener *listener)
static void vfio_listener_commit(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
- void (*listener_commit)(VFIOContainerBase *bcontainer);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
+ void (*listener_commit)(VFIOContainer *bcontainer);
listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit;
@@ -460,7 +460,7 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
VFIORamDiscardListener *vfio_find_ram_discard_listener(
- VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+ VFIOContainer *bcontainer, MemoryRegionSection *section)
{
VFIORamDiscardListener *vrdl = NULL;
@@ -482,12 +482,12 @@ VFIORamDiscardListener *vfio_find_ram_discard_listener(
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
vfio_container_region_add(bcontainer, section, false);
}
-void vfio_container_region_add(VFIOContainerBase *bcontainer,
+void vfio_container_region_add(VFIOContainer *bcontainer,
MemoryRegionSection *section,
bool cpr_remap)
{
@@ -656,8 +656,8 @@ fail:
static void vfio_listener_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
hwaddr iova, end;
Int128 llend, llsize;
int ret;
@@ -744,13 +744,13 @@ typedef struct VFIODirtyRanges {
} VFIODirtyRanges;
typedef struct VFIODirtyRangesListener {
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
VFIODirtyRanges ranges;
MemoryListener listener;
} VFIODirtyRangesListener;
static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
- VFIOContainerBase *bcontainer)
+ VFIOContainer *bcontainer)
{
VFIOPCIDevice *pcidev;
VFIODevice *vbasedev;
@@ -835,7 +835,7 @@ static const MemoryListener vfio_dirty_tracking_listener = {
.region_add = vfio_dirty_tracking_update,
};
-static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
+static void vfio_dirty_tracking_init(VFIOContainer *bcontainer,
VFIODirtyRanges *ranges)
{
VFIODirtyRangesListener dirty;
@@ -860,7 +860,7 @@ static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
memory_listener_unregister(&dirty.listener);
}
-static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
+static void vfio_devices_dma_logging_stop(VFIOContainer *bcontainer)
{
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
sizeof(uint64_t))] = {};
@@ -889,7 +889,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
}
static struct vfio_device_feature *
-vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer,
+vfio_device_feature_dma_logging_start_create(VFIOContainer *bcontainer,
VFIODirtyRanges *tracking)
{
struct vfio_device_feature *feature;
@@ -962,7 +962,7 @@ static void vfio_device_feature_dma_logging_start_destroy(
g_free(feature);
}
-static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+static bool vfio_devices_dma_logging_start(VFIOContainer *bcontainer,
Error **errp)
{
struct vfio_device_feature *feature;
@@ -1006,8 +1006,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener,
Error **errp)
{
ERRP_GUARD();
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
bool ret;
if (vfio_container_devices_dirty_tracking_is_supported(bcontainer)) {
@@ -1024,8 +1024,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener,
static void vfio_listener_log_global_stop(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
Error *local_err = NULL;
int ret = 0;
@@ -1057,7 +1057,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
vfio_giommu_dirty_notifier *gdn = container_of(n,
vfio_giommu_dirty_notifier, n);
VFIOGuestIOMMU *giommu = gdn->giommu;
- VFIOContainerBase *bcontainer = giommu->bcontainer;
+ VFIOContainer *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
ram_addr_t translated_addr;
Error *local_err = NULL;
@@ -1127,7 +1127,7 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
}
static int
-vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
+vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
@@ -1143,7 +1143,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
&vrdl);
}
-static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
+static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
VFIOGuestIOMMU *giommu;
@@ -1180,7 +1180,7 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
return 0;
}
-static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
+static int vfio_sync_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section, Error **errp)
{
ram_addr_t ram_addr;
@@ -1209,8 +1209,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
static void vfio_listener_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
int ret;
Error *local_err = NULL;
@@ -1241,7 +1241,7 @@ static const MemoryListener vfio_memory_listener = {
.log_sync = vfio_listener_log_sync,
};
-bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp)
+bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp)
{
bcontainer->listener = vfio_memory_listener;
memory_listener_register(&bcontainer->listener, bcontainer->space->as);
@@ -1255,7 +1255,7 @@ bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp)
return true;
}
-void vfio_listener_unregister(VFIOContainerBase *bcontainer)
+void vfio_listener_unregister(VFIOContainer *bcontainer)
{
memory_listener_unregister(&bcontainer->listener);
}
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index d3ed3cb..82f6869 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -3,8 +3,8 @@
vfio_ss = ss.source_set()
vfio_ss.add(files(
'listener.c',
- 'container-base.c',
'container.c',
+ 'container-legacy.c',
'helpers.c',
))
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index c97606d..b5da6af 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1159,15 +1159,12 @@ void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
{
- int i, j;
+ int i;
for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
QLIST_REMOVE(quirk, next);
- for (j = 0; j < quirk->nr_mem; j++) {
- object_unparent(OBJECT(&quirk->mem[j]));
- }
g_free(quirk->mem);
g_free(quirk->data);
g_free(quirk);
@@ -1207,14 +1204,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- int i;
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
QLIST_REMOVE(quirk, next);
- for (i = 0; i < quirk->nr_mem; i++) {
- object_unparent(OBJECT(&quirk->mem[i]));
- }
g_free(quirk->mem);
g_free(quirk->data);
g_free(quirk);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d14e96b..5b022da 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -305,7 +305,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
static void vfio_intx_routing_notifier(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
PCIINTxRoute route;
if (vdev->interrupt != VFIO_INT_INTx) {
@@ -660,7 +660,7 @@ void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr)
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIOMSIVector *vector;
int ret;
bool resizing = !!(vdev->nr_vectors < nr + 1);
@@ -755,7 +755,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev,
static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
@@ -1346,7 +1346,7 @@ static const MemoryRegionOps vfio_vga_ops = {
*/
static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIORegion *region = &vdev->bars[bar].region;
MemoryRegion *mmap_mr, *region_mr, *base_mr;
PCIIORegion *r;
@@ -1392,7 +1392,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
*/
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
@@ -1426,7 +1426,7 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
uint32_t val_le = cpu_to_le32(val);
int ret;
@@ -2025,7 +2025,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
vfio_region_finalize(&bar->region);
if (bar->mr) {
assert(bar->size);
- object_unparent(OBJECT(bar->mr));
g_free(bar->mr);
bar->mr = NULL;
}
@@ -2033,9 +2032,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
if (vdev->vga) {
vfio_vga_quirk_finalize(vdev);
- for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
- object_unparent(OBJECT(&vdev->vga->region[i].mem));
- }
g_free(vdev->vga);
}
}
@@ -3396,7 +3392,7 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
int i;
char uuid[UUID_STR_LEN];
@@ -3554,16 +3550,16 @@ error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
}
-static void vfio_instance_finalize(Object *obj)
+static void vfio_pci_finalize(Object *obj)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
vfio_pci_put_device(vdev);
}
static void vfio_exitfn(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
vfio_unregister_req_notifier(vdev);
@@ -3587,7 +3583,7 @@ static void vfio_exitfn(PCIDevice *pdev)
static void vfio_pci_reset(DeviceState *dev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev);
/* Do not reset the device during qemu_system_reset prior to cpr load */
if (cpr_is_incoming()) {
@@ -3629,10 +3625,10 @@ post_reset:
vfio_pci_post_reset(vdev);
}
-static void vfio_instance_init(Object *obj)
+static void vfio_pci_init(Object *obj)
{
PCIDevice *pci_dev = PCI_DEVICE(obj);
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
device_add_bootindex_property(obj, &vdev->bootindex,
@@ -3660,7 +3656,7 @@ static void vfio_instance_init(Object *obj)
pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR;
}
-static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
+static void vfio_pci_device_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
@@ -3672,12 +3668,12 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
pdc->config_write = vfio_pci_write_config;
}
-static const TypeInfo vfio_pci_base_dev_info = {
- .name = TYPE_VFIO_PCI_BASE,
+static const TypeInfo vfio_pci_device_info = {
+ .name = TYPE_VFIO_PCI_DEVICE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(VFIOPCIDevice),
.abstract = true,
- .class_init = vfio_pci_base_dev_class_init,
+ .class_init = vfio_pci_device_class_init,
.interfaces = (const InterfaceInfo[]) {
{ INTERFACE_PCIE_DEVICE },
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
@@ -3687,7 +3683,7 @@ static const TypeInfo vfio_pci_base_dev_info = {
static PropertyInfo vfio_pci_migration_multifd_transfer_prop;
-static const Property vfio_pci_dev_properties[] = {
+static const Property vfio_pci_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
@@ -3762,18 +3758,18 @@ static const Property vfio_pci_dev_properties[] = {
#ifdef CONFIG_IOMMUFD
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
vfio_device_set_fd(&vdev->vbasedev, str, errp);
}
#endif
-static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
+static void vfio_pci_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
device_class_set_legacy_reset(dc, vfio_pci_reset);
- device_class_set_props(dc, vfio_pci_dev_properties);
+ device_class_set_props(dc, vfio_pci_properties);
#ifdef CONFIG_IOMMUFD
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
@@ -3916,15 +3912,15 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
"multifd channels");
}
-static const TypeInfo vfio_pci_dev_info = {
+static const TypeInfo vfio_pci_info = {
.name = TYPE_VFIO_PCI,
- .parent = TYPE_VFIO_PCI_BASE,
- .class_init = vfio_pci_dev_class_init,
- .instance_init = vfio_instance_init,
- .instance_finalize = vfio_instance_finalize,
+ .parent = TYPE_VFIO_PCI_DEVICE,
+ .class_init = vfio_pci_class_init,
+ .instance_init = vfio_pci_init,
+ .instance_finalize = vfio_pci_finalize,
};
-static const Property vfio_pci_dev_nohotplug_properties[] = {
+static const Property vfio_pci_nohotplug_properties[] = {
DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false),
DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice,
use_legacy_x86_rom, false),
@@ -3932,12 +3928,12 @@ static const Property vfio_pci_dev_nohotplug_properties[] = {
ON_OFF_AUTO_AUTO),
};
-static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass,
+static void vfio_pci_nohotplug_class_init(ObjectClass *klass,
const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
+ device_class_set_props(dc, vfio_pci_nohotplug_properties);
dc->hotpluggable = false;
object_class_property_set_description(klass, /* 3.1 */
@@ -3953,11 +3949,11 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass,
"Controls loading of a legacy VGA BIOS ROM");
}
-static const TypeInfo vfio_pci_nohotplug_dev_info = {
+static const TypeInfo vfio_pci_nohotplug_info = {
.name = TYPE_VFIO_PCI_NOHOTPLUG,
.parent = TYPE_VFIO_PCI,
.instance_size = sizeof(VFIOPCIDevice),
- .class_init = vfio_pci_nohotplug_dev_class_init,
+ .class_init = vfio_pci_nohotplug_class_init,
};
static void register_vfio_pci_dev_type(void)
@@ -3973,9 +3969,9 @@ static void register_vfio_pci_dev_type(void)
vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto;
vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true;
- type_register_static(&vfio_pci_base_dev_info);
- type_register_static(&vfio_pci_dev_info);
- type_register_static(&vfio_pci_nohotplug_dev_info);
+ type_register_static(&vfio_pci_device_info);
+ type_register_static(&vfio_pci_info);
+ type_register_static(&vfio_pci_nohotplug_info);
}
type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index e0aef82..0f78cf9 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -120,7 +120,7 @@ typedef struct VFIOMSIXInfo {
MemoryRegion *pba_region;
} VFIOMSIXInfo;
-OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE)
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_DEVICE)
struct VFIOPCIDevice {
PCIDevice parent_obj;
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index d04c57d..b165ab0 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -365,12 +365,9 @@ void vfio_region_finalize(VFIORegion *region)
for (i = 0; i < region->nr_mmaps; i++) {
if (region->mmaps[i].mmap) {
munmap(region->mmaps[i].mmap, region->mmaps[i].size);
- object_unparent(OBJECT(&region->mmaps[i].mem));
}
}
- object_unparent(OBJECT(region->mem));
-
g_free(region->mem);
g_free(region->mmaps);
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index c41e458..8d9d68d 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -15,7 +15,7 @@
#include "system/hostmem.h"
#include "system/address-spaces.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "hw/hw.h"
#include "system/ram_addr.h"
#include "qemu/error-report.h"
@@ -30,12 +30,13 @@ typedef struct VFIOHostDMAWindow {
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
} VFIOHostDMAWindow;
-typedef struct VFIOSpaprContainer {
- VFIOContainer container;
+struct VFIOSpaprContainer {
+ VFIOLegacyContainer parent_obj;
+
MemoryListener prereg_listener;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
unsigned int levels;
-} VFIOSpaprContainer;
+};
OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR);
@@ -61,8 +62,8 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
{
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
prereg_listener);
- VFIOContainer *container = &scontainer->container;
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
@@ -121,7 +122,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener,
{
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
prereg_listener);
- VFIOContainer *container = &scontainer->container;
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
@@ -218,7 +219,7 @@ static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container,
return hostwin_found ? hostwin : NULL;
}
-static int vfio_spapr_remove_window(VFIOContainer *container,
+static int vfio_spapr_remove_window(VFIOLegacyContainer *container,
hwaddr offset_within_address_space)
{
struct vfio_iommu_spapr_tce_remove remove = {
@@ -239,14 +240,13 @@ static int vfio_spapr_remove_window(VFIOContainer *container,
return 0;
}
-static bool vfio_spapr_create_window(VFIOContainer *container,
+static bool vfio_spapr_create_window(VFIOLegacyContainer *container,
MemoryRegionSection *section,
hwaddr *pgsize, Error **errp)
{
int ret = 0;
- VFIOContainerBase *bcontainer = VFIO_IOMMU(container);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(bcontainer);
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask;
unsigned entries, bits_total, bits_per_level, max_levels, ddw_levels;
@@ -348,13 +348,12 @@ static bool vfio_spapr_create_window(VFIOContainer *container,
}
static bool
-vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
+vfio_spapr_container_add_section_window(VFIOContainer *bcontainer,
MemoryRegionSection *section,
Error **errp)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
VFIOHostDMAWindow *hostwin;
hwaddr pgsize = 0;
int ret;
@@ -439,12 +438,11 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
}
static void
-vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
+vfio_spapr_container_del_section_window(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
return;
@@ -461,11 +459,10 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
}
}
-static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
+static void vfio_spapr_container_release(VFIOContainer *bcontainer)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
VFIOHostDMAWindow *hostwin, *next;
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
@@ -478,12 +475,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
}
}
-static bool vfio_spapr_container_setup(VFIOContainerBase *bcontainer,
+static bool vfio_spapr_container_setup(VFIOContainer *bcontainer,
Error **errp)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
struct vfio_iommu_spapr_tce_info info;
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
int ret, fd = container->fd;
diff --git a/hw/vfio/types.h b/hw/vfio/types.h
index c19334f..5482d90 100644
--- a/hw/vfio/types.h
+++ b/hw/vfio/types.h
@@ -9,11 +9,11 @@
#define HW_VFIO_VFIO_TYPES_H
/*
- * TYPE_VFIO_PCI_BASE is an abstract type used to share code
+ * TYPE_VFIO_PCI_DEVICE is an abstract type used to share code
* between VFIO implementations that use a kernel driver
* with those that use user sockets.
*/
-#define TYPE_VFIO_PCI_BASE "vfio-pci-base"
+#define TYPE_VFIO_PCI_DEVICE "vfio-pci-device"
#define TYPE_VFIO_PCI "vfio-pci"
/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */
diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h
index 07ea0f4..6b28e1f 100644
--- a/hw/vfio/vfio-iommufd.h
+++ b/hw/vfio/vfio-iommufd.h
@@ -9,7 +9,7 @@
#ifndef HW_VFIO_VFIO_IOMMUFD_H
#define HW_VFIO_VFIO_IOMMUFD_H
-#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-container.h"
typedef struct VFIODevice VFIODevice;
@@ -22,12 +22,13 @@ typedef struct VFIOIOASHwpt {
typedef struct IOMMUFDBackend IOMMUFDBackend;
-typedef struct VFIOIOMMUFDContainer {
- VFIOContainerBase bcontainer;
+struct VFIOIOMMUFDContainer {
+ VFIOContainer parent_obj;
+
IOMMUFDBackend *be;
uint32_t ioas_id;
QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
-} VFIOIOMMUFDContainer;
+};
OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
diff --git a/hw/vfio/vfio-listener.h b/hw/vfio/vfio-listener.h
index eb69ddd..a90674c 100644
--- a/hw/vfio/vfio-listener.h
+++ b/hw/vfio/vfio-listener.h
@@ -9,7 +9,7 @@
#ifndef HW_VFIO_VFIO_LISTENER_H
#define HW_VFIO_VFIO_LISTENER_H
-bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_listener_unregister(VFIOContainerBase *bcontainer);
+bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp);
+void vfio_listener_unregister(VFIOContainer *bcontainer);
#endif /* HW_VFIO_VFIO_LISTENER_H */
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index 09cca4e..e9ba173 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -637,14 +637,5 @@ void xen_pt_msix_unmap(XenPCIPassthroughState *s)
void xen_pt_msix_delete(XenPCIPassthroughState *s)
{
- XenPTMSIX *msix = s->msix;
-
- if (!msix) {
- return;
- }
-
- object_unparent(OBJECT(&msix->mmio));
-
- g_free(s->msix);
- s->msix = NULL;
+ g_clear_pointer(&s->msix, g_free);
}
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
deleted file mode 100644
index acbd48a..0000000
--- a/include/hw/vfio/vfio-container-base.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * VFIO BASE CONTAINER
- *
- * Copyright (C) 2023 Intel Corporation.
- * Copyright Red Hat, Inc. 2023
- *
- * Authors: Yi Liu <yi.l.liu@intel.com>
- * Eric Auger <eric.auger@redhat.com>
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H
-#define HW_VFIO_VFIO_CONTAINER_BASE_H
-
-#include "system/memory.h"
-
-typedef struct VFIODevice VFIODevice;
-typedef struct VFIOIOMMUClass VFIOIOMMUClass;
-
-typedef struct {
- unsigned long *bitmap;
- hwaddr size;
- hwaddr pages;
-} VFIOBitmap;
-
-typedef struct VFIOAddressSpace {
- AddressSpace *as;
- QLIST_HEAD(, VFIOContainerBase) containers;
- QLIST_ENTRY(VFIOAddressSpace) list;
-} VFIOAddressSpace;
-
-/*
- * This is the base object for vfio container backends
- */
-struct VFIOContainerBase {
- Object parent_obj;
-
- VFIOAddressSpace *space;
- MemoryListener listener;
- Error *error;
- bool initialized;
- uint64_t dirty_pgsizes;
- uint64_t max_dirty_bitmap_size;
- unsigned long pgsizes;
- unsigned int dma_max_mappings;
- bool dirty_pages_supported;
- bool dirty_pages_started; /* Protected by BQL */
- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
- QLIST_ENTRY(VFIOContainerBase) next;
- QLIST_HEAD(, VFIODevice) device_list;
- GList *iova_ranges;
- NotifierWithReturn cpr_reboot_notifier;
-};
-
-#define TYPE_VFIO_IOMMU "vfio-iommu"
-OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
-
-typedef struct VFIOGuestIOMMU {
- VFIOContainerBase *bcontainer;
- IOMMUMemoryRegion *iommu_mr;
- hwaddr iommu_offset;
- IOMMUNotifier n;
- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
-} VFIOGuestIOMMU;
-
-typedef struct VFIORamDiscardListener {
- VFIOContainerBase *bcontainer;
- MemoryRegion *mr;
- hwaddr offset_within_address_space;
- hwaddr size;
- uint64_t granularity;
- RamDiscardListener listener;
- QLIST_ENTRY(VFIORamDiscardListener) next;
-} VFIORamDiscardListener;
-
-VFIOAddressSpace *vfio_address_space_get(AddressSpace *as);
-void vfio_address_space_put(VFIOAddressSpace *space);
-void vfio_address_space_insert(VFIOAddressSpace *space,
- VFIOContainerBase *bcontainer);
-
-int vfio_container_dma_map(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly, MemoryRegion *mr);
-int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all);
-bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp);
-void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section);
-int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
- bool start, Error **errp);
-bool vfio_container_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer);
-bool vfio_container_devices_dirty_tracking_is_supported(
- const VFIOContainerBase *bcontainer);
-int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp);
-
-GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer);
-
-static inline uint64_t
-vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
-{
- assert(bcontainer);
- return bcontainer->pgsizes;
-}
-
-#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
-#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
-#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
-#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
-
-struct VFIOIOMMUClass {
- ObjectClass parent_class;
-
- /**
- * @setup
- *
- * Perform basic setup of the container, including configuring IOMMU
- * capabilities, IOVA ranges, supported page sizes, etc.
- *
- * @bcontainer: #VFIOContainerBase
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns true to indicate success and false for error.
- */
- bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
-
- /**
- * @listener_begin
- *
- * Called at the beginning of an address space update transaction.
- * See #MemoryListener.
- *
- * @bcontainer: #VFIOContainerBase
- */
- void (*listener_begin)(VFIOContainerBase *bcontainer);
-
- /**
- * @listener_commit
- *
- * Called at the end of an address space update transaction,
- * See #MemoryListener.
- *
- * @bcontainer: #VFIOContainerBase
- */
- void (*listener_commit)(VFIOContainerBase *bcontainer);
-
- /**
- * @dma_map
- *
- * Map an address range into the container. Note that the memory region is
- * referenced within an RCU read lock region across this call.
- *
- * @bcontainer: #VFIOContainerBase to use
- * @iova: start address to map
- * @size: size of the range to map
- * @vaddr: process virtual address of mapping
- * @readonly: true if mapping should be readonly
- * @mr: the memory region for this mapping
- *
- * Returns 0 to indicate success and -errno otherwise.
- */
- int (*dma_map)(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly, MemoryRegion *mr);
- /**
- * @dma_map_file
- *
- * Map a file range for the container.
- *
- * @bcontainer: #VFIOContainerBase to use for map
- * @iova: start address to map
- * @size: size of the range to map
- * @fd: descriptor of the file to map
- * @start: starting file offset of the range to map
- * @readonly: map read only if true
- */
- int (*dma_map_file)(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- int fd, unsigned long start, bool readonly);
- /**
- * @dma_unmap
- *
- * Unmap an address range from the container.
- *
- * @bcontainer: #VFIOContainerBase to use for unmap
- * @iova: start address to unmap
- * @size: size of the range to unmap
- * @iotlb: The IOMMU TLB mapping entry (or NULL)
- * @unmap_all: if set, unmap the entire address space
- *
- * Returns 0 to indicate success and -errno otherwise.
- */
- int (*dma_unmap)(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all);
-
-
- /**
- * @attach_device
- *
- * Associate the given device with a container and do some related
- * initialization of the device context.
- *
- * @name: name of the device
- * @vbasedev: the device
- * @as: address space to use
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns true to indicate success and false for error.
- */
- bool (*attach_device)(const char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp);
-
- /*
- * @detach_device
- *
- * Detach the given device from its container and clean up any necessary
- * state.
- *
- * @vbasedev: the device to disassociate
- */
- void (*detach_device)(VFIODevice *vbasedev);
-
- /* migration feature */
-
- /**
- * @set_dirty_page_tracking
- *
- * Start or stop dirty pages tracking on VFIO container
- *
- * @bcontainer: #VFIOContainerBase on which to de/activate dirty
- * page tracking
- * @start: indicates whether to start or stop dirty pages tracking
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns zero to indicate success and negative for error.
- */
- int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
- bool start, Error **errp);
- /**
- * @query_dirty_bitmap
- *
- * Get bitmap of dirty pages from container
- *
- * @bcontainer: #VFIOContainerBase from which to get dirty pages
- * @vbmap: #VFIOBitmap internal bitmap structure
- * @iova: iova base address
- * @size: size of iova range
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Returns zero to indicate success and negative for error.
- */
- int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
- /* PCI specific */
- int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
-
- /* SPAPR specific */
- bool (*add_window)(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp);
- void (*del_window)(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section);
- void (*release)(VFIOContainerBase *bcontainer);
-};
-
-VFIORamDiscardListener *vfio_find_ram_discard_listener(
- VFIOContainerBase *bcontainer, MemoryRegionSection *section);
-
-void vfio_container_region_add(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section, bool cpr_remap);
-
-#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-container-legacy.h b/include/hw/vfio/vfio-container-legacy.h
new file mode 100644
index 0000000..74a72df
--- /dev/null
+++ b/include/hw/vfio/vfio-container-legacy.h
@@ -0,0 +1,39 @@
+/*
+ * VFIO container
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_CONTAINER_LEGACY_H
+#define HW_VFIO_CONTAINER_LEGACY_H
+
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
+
+typedef struct VFIOLegacyContainer VFIOLegacyContainer;
+typedef struct VFIODevice VFIODevice;
+
+typedef struct VFIOGroup {
+ int fd;
+ int groupid;
+ VFIOLegacyContainer *container;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOGroup) next;
+ QLIST_ENTRY(VFIOGroup) container_next;
+ bool ram_block_discard_allowed;
+} VFIOGroup;
+
+struct VFIOLegacyContainer {
+ VFIOContainer parent_obj;
+
+ int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+ unsigned iommu_type;
+ QLIST_HEAD(, VFIOGroup) group_list;
+ VFIOContainerCPR cpr;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOLegacyContainer, VFIO_IOMMU_LEGACY);
+
+#endif /* HW_VFIO_CONTAINER_LEGACY_H */
diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h
index 240f566..b8fb2b8 100644
--- a/include/hw/vfio/vfio-container.h
+++ b/include/hw/vfio/vfio-container.h
@@ -1,39 +1,279 @@
/*
- * VFIO container
+ * VFIO BASE CONTAINER
*
- * Copyright Red Hat, Inc. 2025
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
-#ifndef HW_VFIO_CONTAINER_H
-#define HW_VFIO_CONTAINER_H
+#ifndef HW_VFIO_VFIO_CONTAINER_H
+#define HW_VFIO_VFIO_CONTAINER_H
-#include "hw/vfio/vfio-container-base.h"
-#include "hw/vfio/vfio-cpr.h"
+#include "system/memory.h"
-typedef struct VFIOContainer VFIOContainer;
typedef struct VFIODevice VFIODevice;
+typedef struct VFIOIOMMUClass VFIOIOMMUClass;
-typedef struct VFIOGroup {
- int fd;
- int groupid;
- VFIOContainer *container;
- QLIST_HEAD(, VFIODevice) device_list;
- QLIST_ENTRY(VFIOGroup) next;
- QLIST_ENTRY(VFIOGroup) container_next;
- bool ram_block_discard_allowed;
-} VFIOGroup;
+typedef struct {
+ unsigned long *bitmap;
+ hwaddr size;
+ hwaddr pages;
+} VFIOBitmap;
+typedef struct VFIOAddressSpace {
+ AddressSpace *as;
+ QLIST_HEAD(, VFIOContainer) containers;
+ QLIST_ENTRY(VFIOAddressSpace) list;
+} VFIOAddressSpace;
+
+/*
+ * This is the base object for vfio container backends
+ */
struct VFIOContainer {
- VFIOContainerBase parent_obj;
+ Object parent_obj;
- int fd; /* /dev/vfio/vfio, empowered by the attached groups */
- unsigned iommu_type;
- QLIST_HEAD(, VFIOGroup) group_list;
- VFIOContainerCPR cpr;
+ VFIOAddressSpace *space;
+ MemoryListener listener;
+ Error *error;
+ bool initialized;
+ uint64_t dirty_pgsizes;
+ uint64_t max_dirty_bitmap_size;
+ unsigned long pgsizes;
+ unsigned int dma_max_mappings;
+ bool dirty_pages_supported;
+ bool dirty_pages_started; /* Protected by BQL */
+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+ QLIST_ENTRY(VFIOContainer) next;
+ QLIST_HEAD(, VFIODevice) device_list;
+ GList *iova_ranges;
+ NotifierWithReturn cpr_reboot_notifier;
};
-OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
+#define TYPE_VFIO_IOMMU "vfio-iommu"
+OBJECT_DECLARE_TYPE(VFIOContainer, VFIOIOMMUClass, VFIO_IOMMU)
+
+typedef struct VFIOGuestIOMMU {
+ VFIOContainer *bcontainer;
+ IOMMUMemoryRegion *iommu_mr;
+ hwaddr iommu_offset;
+ IOMMUNotifier n;
+ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
+} VFIOGuestIOMMU;
+
+typedef struct VFIORamDiscardListener {
+ VFIOContainer *bcontainer;
+ MemoryRegion *mr;
+ hwaddr offset_within_address_space;
+ hwaddr size;
+ uint64_t granularity;
+ RamDiscardListener listener;
+ QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
+VFIOAddressSpace *vfio_address_space_get(AddressSpace *as);
+void vfio_address_space_put(VFIOAddressSpace *space);
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainer *bcontainer);
+
+int vfio_container_dma_map(VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+int vfio_container_dma_unmap(VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all);
+bool vfio_container_add_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp);
+void vfio_container_del_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section);
+int vfio_container_set_dirty_page_tracking(VFIOContainer *bcontainer,
+ bool start, Error **errp);
+bool vfio_container_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer);
+bool vfio_container_devices_dirty_tracking_is_supported(
+ const VFIOContainer *bcontainer);
+int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ uint64_t iova, uint64_t size, ram_addr_t ram_addr, Error **errp);
+
+GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer);
+
+static inline uint64_t
+vfio_container_get_page_size_mask(const VFIOContainer *bcontainer)
+{
+ assert(bcontainer);
+ return bcontainer->pgsizes;
+}
+
+#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
+#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
+#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
+
+struct VFIOIOMMUClass {
+ ObjectClass parent_class;
+
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainer
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
+ bool (*setup)(VFIOContainer *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainer
+ */
+ void (*listener_begin)(VFIOContainer *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainer
+ */
+ void (*listener_commit)(VFIOContainer *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainer to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
+ int (*dma_map)(const VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr);
+ /**
+ * @dma_map_file
+ *
+ * Map a file range for the container.
+ *
+ * @bcontainer: #VFIOContainer to use for map
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @fd: descriptor of the file to map
+ * @start: starting file offset of the range to map
+ * @readonly: map read only if true
+ */
+ int (*dma_map_file)(const VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ int fd, unsigned long start, bool readonly);
+ /**
+ * @dma_unmap
+ *
+ * Unmap an address range from the container.
+ *
+ * @bcontainer: #VFIOContainer to use for unmap
+ * @iova: start address to unmap
+ * @size: size of the range to unmap
+ * @iotlb: The IOMMU TLB mapping entry (or NULL)
+ * @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
+ int (*dma_unmap)(const VFIOContainer *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
+ bool (*attach_device)(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
+ void (*detach_device)(VFIODevice *vbasedev);
+
+ /* migration feature */
+
+ /**
+ * @set_dirty_page_tracking
+ *
+ * Start or stop dirty pages tracking on VFIO container
+ *
+ * @bcontainer: #VFIOContainer on which to de/activate dirty
+ * page tracking
+ * @start: indicates whether to start or stop dirty pages tracking
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error.
+ */
+ int (*set_dirty_page_tracking)(const VFIOContainer *bcontainer,
+ bool start, Error **errp);
+ /**
+ * @query_dirty_bitmap
+ *
+ * Get bitmap of dirty pages from container
+ *
+ * @bcontainer: #VFIOContainer from which to get dirty pages
+ * @vbmap: #VFIOBitmap internal bitmap structure
+ * @iova: iova base address
+ * @size: size of iova range
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns zero to indicate success and negative for error.
+ */
+ int (*query_dirty_bitmap)(const VFIOContainer *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
+ /* PCI specific */
+ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
+
+ /* SPAPR specific */
+ bool (*add_window)(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp);
+ void (*del_window)(VFIOContainer *bcontainer,
+ MemoryRegionSection *section);
+ void (*release)(VFIOContainer *bcontainer);
+};
+
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainer *bcontainer, MemoryRegionSection *section);
+
+void vfio_container_region_add(VFIOContainer *bcontainer,
+ MemoryRegionSection *section, bool cpr_remap);
-#endif /* HW_VFIO_CONTAINER_H */
+#endif /* HW_VFIO_VFIO_CONTAINER_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d37daff..26ee0c4 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -12,15 +12,15 @@
#include "migration/misc.h"
#include "system/memory.h"
+struct VFIOLegacyContainer;
struct VFIOContainer;
-struct VFIOContainerBase;
struct VFIOGroup;
struct VFIODevice;
struct VFIOPCIDevice;
struct VFIOIOMMUFDContainer;
struct IOMMUFDBackend;
-typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer,
+typedef int (*dma_map_fn)(const struct VFIOContainer *bcontainer,
hwaddr iova, ram_addr_t size, void *vaddr,
bool readonly, MemoryRegion *mr);
@@ -42,9 +42,10 @@ typedef struct VFIOPCICPR {
NotifierWithReturn transfer_notifier;
} VFIOPCICPR;
-bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
+bool vfio_legacy_cpr_register_container(struct VFIOLegacyContainer *container,
Error **errp);
-void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container);
+void vfio_legacy_cpr_unregister_container(
+ struct VFIOLegacyContainer *container);
int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
Error **errp);
@@ -61,14 +62,14 @@ void vfio_cpr_load_device(struct VFIODevice *vbasedev);
int vfio_cpr_group_get_device_fd(int d, const char *name);
-bool vfio_cpr_container_match(struct VFIOContainer *container,
+bool vfio_cpr_container_match(struct VFIOLegacyContainer *container,
struct VFIOGroup *group, int fd);
-void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer,
+void vfio_cpr_giommu_remap(struct VFIOContainer *bcontainer,
MemoryRegionSection *section);
bool vfio_cpr_ram_discard_register_listener(
- struct VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+ struct VFIOContainer *bcontainer, MemoryRegionSection *section);
void vfio_cpr_save_vector_fd(struct VFIOPCIDevice *vdev, const char *name,
int nr, int fd);
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index e7e6243..7e9aed6 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -18,8 +18,8 @@
* Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
*/
-#ifndef HW_VFIO_VFIO_COMMON_H
-#define HW_VFIO_VFIO_COMMON_H
+#ifndef HW_VFIO_VFIO_DEVICE_H
+#define HW_VFIO_VFIO_DEVICE_H
#include "system/memory.h"
#include "qemu/queue.h"
@@ -27,7 +27,7 @@
#include <linux/vfio.h>
#endif
#include "system/system.h"
-#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-container.h"
#include "hw/vfio/vfio-cpr.h"
#include "system/host_iommu_device.h"
#include "system/iommufd.h"
@@ -54,7 +54,7 @@ typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) container_next;
QLIST_ENTRY(VFIODevice) global_next;
struct VFIOGroup *group;
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
char *sysfsdev;
char *name;
DeviceState *dev;
@@ -252,7 +252,7 @@ struct VFIODeviceIOOps {
void *data, bool post);
};
-void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
+void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
struct vfio_device_info *info);
void vfio_device_unprepare(VFIODevice *vbasedev);
@@ -288,4 +288,4 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
int vfio_device_get_aw_bits(VFIODevice *vdev);
void vfio_kvm_device_close(void);
-#endif /* HW_VFIO_VFIO_COMMON_H */
+#endif /* HW_VFIO_VFIO_DEVICE_H */
diff --git a/include/semihosting/common-semi.h b/include/semihosting/common-semi.h
index 0a91db7..aa511a4 100644
--- a/include/semihosting/common-semi.h
+++ b/include/semihosting/common-semi.h
@@ -35,5 +35,11 @@
#define COMMON_SEMI_H
void do_common_semihosting(CPUState *cs);
+uint64_t common_semi_arg(CPUState *cs, int argno);
+void common_semi_set_ret(CPUState *cs, uint64_t ret);
+bool is_64bit_semihosting(CPUArchState *env);
+bool common_semi_sys_exit_is_extended(CPUState *cs);
+uint64_t common_semi_stack_bottom(CPUState *cs);
+bool common_semi_has_synccache(CPUArchState *env);
#endif /* COMMON_SEMI_H */
diff --git a/include/semihosting/guestfd.h b/include/semihosting/guestfd.h
index 3d426fe..a7ea104 100644
--- a/include/semihosting/guestfd.h
+++ b/include/semihosting/guestfd.h
@@ -35,13 +35,6 @@ typedef struct GuestFD {
};
} GuestFD;
-/*
- * For ARM semihosting, we have a separate structure for routing
- * data for the console which is outside the guest fd address space.
- */
-extern GuestFD console_in_gf;
-extern GuestFD console_out_gf;
-
/**
* alloc_guestfd:
*
diff --git a/include/semihosting/semihost.h b/include/semihosting/semihost.h
index b03e637..231dc89 100644
--- a/include/semihosting/semihost.h
+++ b/include/semihosting/semihost.h
@@ -33,6 +33,8 @@ typedef enum SemihostingTarget {
* Return true if guest code is allowed to make semihosting calls.
*/
bool semihosting_enabled(bool is_user);
+bool semihosting_arm_compatible(void);
+void semihosting_arm_compatible_init(void);
SemihostingTarget semihosting_get_target(void);
const char *semihosting_get_arg(int i);
diff --git a/include/semihosting/syscalls.h b/include/semihosting/syscalls.h
index 6627c45..03aa45b 100644
--- a/include/semihosting/syscalls.h
+++ b/include/semihosting/syscalls.h
@@ -9,7 +9,7 @@
#ifndef SEMIHOSTING_SYSCALLS_H
#define SEMIHOSTING_SYSCALLS_H
-#include "exec/cpu-defs.h"
+#include "exec/vaddr.h"
#include "gdbstub/syscalls.h"
/*
@@ -24,23 +24,23 @@
typedef struct GuestFD GuestFD;
void semihost_sys_open(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
+ vaddr fname, uint64_t fname_len,
int gdb_flags, int mode);
void semihost_sys_close(CPUState *cs, gdb_syscall_complete_cb complete,
int fd);
void semihost_sys_read(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong buf, target_ulong len);
+ int fd, vaddr buf, uint64_t len);
void semihost_sys_read_gf(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len);
+ GuestFD *gf, vaddr buf, uint64_t len);
void semihost_sys_write(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong buf, target_ulong len);
+ int fd, vaddr buf, uint64_t len);
void semihost_sys_write_gf(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len);
+ GuestFD *gf, vaddr buf, uint64_t len);
void semihost_sys_lseek(CPUState *cs, gdb_syscall_complete_cb complete,
int fd, int64_t off, int gdb_whence);
@@ -50,27 +50,27 @@ void semihost_sys_isatty(CPUState *cs, gdb_syscall_complete_cb complete,
void semihost_sys_flen(CPUState *cs, gdb_syscall_complete_cb fstat_cb,
gdb_syscall_complete_cb flen_cb,
- int fd, target_ulong fstat_addr);
+ int fd, vaddr fstat_addr);
void semihost_sys_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong addr);
+ int fd, vaddr addr);
void semihost_sys_stat(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
- target_ulong addr);
+ vaddr fname, uint64_t fname_len,
+ vaddr addr);
void semihost_sys_remove(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len);
+ vaddr fname, uint64_t fname_len);
void semihost_sys_rename(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong oname, target_ulong oname_len,
- target_ulong nname, target_ulong nname_len);
+ vaddr oname, uint64_t oname_len,
+ vaddr nname, uint64_t nname_len);
void semihost_sys_system(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong cmd, target_ulong cmd_len);
+ vaddr cmd, uint64_t cmd_len);
void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong tv_addr, target_ulong tz_addr);
+ vaddr tv_addr, vaddr tz_addr);
void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
int fd, GIOCondition cond, int timeout);
diff --git a/linux-user/arm/target_proc.h b/linux-user/arm/target_proc.h
index a4cd694..a28d723 100644
--- a/linux-user/arm/target_proc.h
+++ b/linux-user/arm/target_proc.h
@@ -6,6 +6,8 @@
#ifndef ARM_TARGET_PROC_H
#define ARM_TARGET_PROC_H
+#include "target/arm/cpu-features.h" /* for MIDR_EL1 field definitions */
+
static int open_cpuinfo(CPUArchState *cpu_env, int fd)
{
ARMCPU *cpu = env_archcpu(cpu_env);
diff --git a/linux-user/strace.c b/linux-user/strace.c
index 1233ebc..758c5d3 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -54,7 +54,7 @@ struct flags {
};
/* No 'struct flags' element should have a zero mask. */
-#define FLAG_BASIC(V, M, N) { V, M | QEMU_BUILD_BUG_ON_ZERO(!(M)), N }
+#define FLAG_BASIC(V, M, N) { V, M | QEMU_BUILD_BUG_ON_ZERO((M) == 0), N }
/* common flags for all architectures */
#define FLAG_GENERIC_MASK(V, M) FLAG_BASIC(V, M, #V)
diff --git a/meson.build b/meson.build
index 72da978..bdfb621 100644
--- a/meson.build
+++ b/meson.build
@@ -1280,6 +1280,7 @@ endif
enable_passt = get_option('passt') \
.require(host_os == 'linux', error_message: 'passt is supported only on Linux') \
+ .require(gio.found(), error_message: 'passt requires gio') \
.allowed()
vde = not_found
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index eea9286..8315f98 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -15,6 +15,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c84fc003e338a6f69fbd4f7fe9f92b535ff13e9af8997f3b14b6ddff8b1df46d"
[[package]]
+name = "attrs"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a207d40f43de65285f3de0509bb6cb16bc46098864fce957122bbacce327e5f"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
name = "bilge"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -188,6 +198,7 @@ dependencies = [
name = "qemu_macros"
version = "0.1.0"
dependencies = [
+ "attrs",
"proc-macro2",
"quote",
"syn",
diff --git a/rust/bql/meson.build b/rust/bql/meson.build
index f369209..7214d94 100644
--- a/rust/bql/meson.build
+++ b/rust/bql/meson.build
@@ -47,6 +47,5 @@ bql_rs = declare_dependency(link_with: [_bql_rs],
# in a separate suite that is run by the "build" CI jobs rather than "check".
rust.doctest('rust-bql-rs-doctests',
_bql_rs,
- protocol: 'rust',
dependencies: bql_rs,
suite: ['doc', 'rust'])
diff --git a/rust/common/meson.build b/rust/common/meson.build
index b805e0f..aff601d 100644
--- a/rust/common/meson.build
+++ b/rust/common/meson.build
@@ -24,11 +24,13 @@ _common_rs = static_library(
common_rs = declare_dependency(link_with: [_common_rs])
+rust.test('rust-common-tests', _common_rs,
+ suite: ['unit', 'rust'])
+
# Doctests are essentially integration tests, so they need the same dependencies.
# Note that running them requires the object files for C code, so place them
# in a separate suite that is run by the "build" CI jobs rather than "check".
rust.doctest('rust-common-doctests',
_common_rs,
- protocol: 'rust',
dependencies: common_rs,
suite: ['doc', 'rust'])
diff --git a/rust/common/src/uninit.rs b/rust/common/src/uninit.rs
index e7f9fcd..8d021b1 100644
--- a/rust/common/src/uninit.rs
+++ b/rust/common/src/uninit.rs
@@ -35,7 +35,7 @@ impl<'a, T, U> MaybeUninitField<'a, T, U> {
}
}
-impl<'a, T, U> Deref for MaybeUninitField<'a, T, U> {
+impl<T, U> Deref for MaybeUninitField<'_, T, U> {
type Target = MaybeUninit<U>;
fn deref(&self) -> &MaybeUninit<U> {
@@ -46,7 +46,7 @@ impl<'a, T, U> Deref for MaybeUninitField<'a, T, U> {
}
}
-impl<'a, T, U> DerefMut for MaybeUninitField<'a, T, U> {
+impl<T, U> DerefMut for MaybeUninitField<'_, T, U> {
fn deref_mut(&mut self) -> &mut MaybeUninit<U> {
// SAFETY: self.child was obtained by dereferencing a valid mutable
// reference; the content of the memory may be invalid or uninitialized
diff --git a/rust/hw/core/src/qdev.rs b/rust/hw/core/src/qdev.rs
index 71b9ef1..a4493db 100644
--- a/rust/hw/core/src/qdev.rs
+++ b/rust/hw/core/src/qdev.rs
@@ -6,7 +6,7 @@
use std::{
ffi::{c_int, c_void, CStr, CString},
- ptr::NonNull,
+ ptr::{addr_of, NonNull},
};
use chardev::Chardev;
@@ -109,9 +109,16 @@ unsafe extern "C" fn rust_resettable_exit_fn<T: ResettablePhasesImpl>(
///
/// # Safety
///
-/// This trait is marked as `unsafe` because currently having a `const` refer to
-/// an `extern static` as a reference instead of a raw pointer results in this
-/// compiler error:
+/// This trait is marked as `unsafe` because `BASE_INFO` and `BIT_INFO` must be
+/// valid raw references to [`bindings::PropertyInfo`].
+///
+/// Note we could not use a regular reference:
+///
+/// ```text
+/// const VALUE: &bindings::PropertyInfo = ...
+/// ```
+///
+/// because this results in the following compiler error:
///
/// ```text
/// constructing invalid value: encountered reference to `extern` static in `const`
@@ -119,28 +126,37 @@ unsafe extern "C" fn rust_resettable_exit_fn<T: ResettablePhasesImpl>(
///
/// This is because the compiler generally might dereference a normal reference
/// during const evaluation, but not in this case (if it did, it'd need to
-/// dereference the raw pointer so this would fail to compile).
+/// dereference the raw pointer so using a `*const` would also fail to compile).
///
/// It is the implementer's responsibility to provide a valid
/// [`bindings::PropertyInfo`] pointer for the trait implementation to be safe.
pub unsafe trait QDevProp {
- const VALUE: *const bindings::PropertyInfo;
-}
-
-/// Use [`bindings::qdev_prop_bool`] for `bool`.
-unsafe impl QDevProp for bool {
- const VALUE: *const bindings::PropertyInfo = unsafe { &bindings::qdev_prop_bool };
+ const BASE_INFO: *const bindings::PropertyInfo;
+ const BIT_INFO: *const bindings::PropertyInfo = {
+ panic!("invalid type for bit property");
+ };
}
-/// Use [`bindings::qdev_prop_uint64`] for `u64`.
-unsafe impl QDevProp for u64 {
- const VALUE: *const bindings::PropertyInfo = unsafe { &bindings::qdev_prop_uint64 };
+macro_rules! impl_qdev_prop {
+ ($type:ty,$info:ident$(, $bit_info:ident)?) => {
+ unsafe impl $crate::qdev::QDevProp for $type {
+ const BASE_INFO: *const $crate::bindings::PropertyInfo =
+ addr_of!($crate::bindings::$info);
+ $(const BIT_INFO: *const $crate::bindings::PropertyInfo =
+ addr_of!($crate::bindings::$bit_info);)?
+ }
+ };
}
-/// Use [`bindings::qdev_prop_chr`] for [`chardev::CharBackend`].
-unsafe impl QDevProp for chardev::CharBackend {
- const VALUE: *const bindings::PropertyInfo = unsafe { &bindings::qdev_prop_chr };
-}
+impl_qdev_prop!(bool, qdev_prop_bool);
+impl_qdev_prop!(u8, qdev_prop_uint8);
+impl_qdev_prop!(u16, qdev_prop_uint16);
+impl_qdev_prop!(u32, qdev_prop_uint32, qdev_prop_bit);
+impl_qdev_prop!(u64, qdev_prop_uint64, qdev_prop_bit64);
+impl_qdev_prop!(usize, qdev_prop_usize);
+impl_qdev_prop!(i32, qdev_prop_int32);
+impl_qdev_prop!(i64, qdev_prop_int64);
+impl_qdev_prop!(chardev::CharBackend, qdev_prop_chr);
/// Trait to define device properties.
///
@@ -232,59 +248,6 @@ impl DeviceClass {
}
}
-#[macro_export]
-macro_rules! define_property {
- ($name:expr, $state:ty, $field:ident, $prop:expr, $type:ty, bit = $bitnr:expr, default = $defval:expr$(,)*) => {
- $crate::bindings::Property {
- // use associated function syntax for type checking
- name: ::std::ffi::CStr::as_ptr($name),
- info: $prop,
- offset: ::std::mem::offset_of!($state, $field) as isize,
- bitnr: $bitnr,
- set_default: true,
- defval: $crate::bindings::Property__bindgen_ty_1 { u: $defval as u64 },
- ..::common::zeroable::Zeroable::ZERO
- }
- };
- ($name:expr, $state:ty, $field:ident, $prop:expr, $type:ty, default = $defval:expr$(,)*) => {
- $crate::bindings::Property {
- // use associated function syntax for type checking
- name: ::std::ffi::CStr::as_ptr($name),
- info: $prop,
- offset: ::std::mem::offset_of!($state, $field) as isize,
- set_default: true,
- defval: $crate::bindings::Property__bindgen_ty_1 { u: $defval as u64 },
- ..::common::zeroable::Zeroable::ZERO
- }
- };
- ($name:expr, $state:ty, $field:ident, $prop:expr, $type:ty$(,)*) => {
- $crate::bindings::Property {
- // use associated function syntax for type checking
- name: ::std::ffi::CStr::as_ptr($name),
- info: $prop,
- offset: ::std::mem::offset_of!($state, $field) as isize,
- set_default: false,
- ..::common::zeroable::Zeroable::ZERO
- }
- };
-}
-
-#[macro_export]
-macro_rules! declare_properties {
- ($ident:ident, $($prop:expr),*$(,)*) => {
- pub static $ident: [$crate::bindings::Property; {
- let mut len = 0;
- $({
- _ = stringify!($prop);
- len += 1;
- })*
- len
- }] = [
- $($prop),*,
- ];
- };
-}
-
unsafe impl ObjectType for DeviceState {
type Class = DeviceClass;
const TYPE_NAME: &'static CStr =
diff --git a/rust/hw/timer/hpet/src/device.rs b/rust/hw/timer/hpet/src/device.rs
index 3cfbe9c..86638c0 100644
--- a/rust/hw/timer/hpet/src/device.rs
+++ b/rust/hw/timer/hpet/src/device.rs
@@ -13,9 +13,8 @@ use std::{
use bql::{BqlCell, BqlRefCell};
use common::{bitops::IntegerExt, uninit_field_mut};
use hwcore::{
- bindings::{qdev_prop_bit, qdev_prop_bool, qdev_prop_uint32, qdev_prop_usize},
- declare_properties, define_property, DeviceImpl, DeviceMethods, DeviceState, InterruptSource,
- Property, ResetType, ResettablePhasesImpl, SysBusDevice, SysBusDeviceImpl, SysBusDeviceMethods,
+ DeviceImpl, DeviceMethods, DeviceState, InterruptSource, ResetType, ResettablePhasesImpl,
+ SysBusDevice, SysBusDeviceImpl, SysBusDeviceMethods,
};
use migration::{
self, impl_vmstate_struct, vmstate_fields, vmstate_of, vmstate_subsections, vmstate_validate,
@@ -520,7 +519,7 @@ impl HPETTimer {
/// HPET Event Timer Block Abstraction
#[repr(C)]
-#[derive(qom::Object)]
+#[derive(qom::Object, hwcore::Device)]
pub struct HPETState {
parent_obj: ParentField<SysBusDevice>,
iomem: MemoryRegion,
@@ -540,10 +539,12 @@ pub struct HPETState {
// Internal state
/// Capabilities that QEMU HPET supports.
/// bit 0: MSI (or FSB) support.
+ #[property(rename = "msi", bit = HPET_FLAG_MSI_SUPPORT_SHIFT as u8, default = false)]
flags: u32,
/// Offset of main counter relative to qemu clock.
hpet_offset: BqlCell<u64>,
+ #[property(rename = "hpet-offset-saved", default = true)]
hpet_offset_saved: bool,
irqs: [InterruptSource; HPET_NUM_IRQ_ROUTES],
@@ -555,11 +556,13 @@ pub struct HPETState {
/// the timers' interrupt can be routed, and is encoded in the
/// bits 32:64 of timer N's config register:
#[doc(alias = "intcap")]
+ #[property(rename = "hpet-intcap", default = 0)]
int_route_cap: u32,
/// HPET timer array managed by this timer block.
#[doc(alias = "timer")]
timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS],
+ #[property(rename = "timers", default = HPET_MIN_TIMERS)]
num_timers: usize,
num_timers_save: BqlCell<u8>,
@@ -901,44 +904,6 @@ impl ObjectImpl for HPETState {
const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>;
}
-// TODO: Make these properties user-configurable!
-declare_properties! {
- HPET_PROPERTIES,
- define_property!(
- c"timers",
- HPETState,
- num_timers,
- unsafe { &qdev_prop_usize },
- u8,
- default = HPET_MIN_TIMERS
- ),
- define_property!(
- c"msi",
- HPETState,
- flags,
- unsafe { &qdev_prop_bit },
- u32,
- bit = HPET_FLAG_MSI_SUPPORT_SHIFT as u8,
- default = false,
- ),
- define_property!(
- c"hpet-intcap",
- HPETState,
- int_route_cap,
- unsafe { &qdev_prop_uint32 },
- u32,
- default = 0
- ),
- define_property!(
- c"hpet-offset-saved",
- HPETState,
- hpet_offset_saved,
- unsafe { &qdev_prop_bool },
- bool,
- default = true
- ),
-}
-
static VMSTATE_HPET_RTC_IRQ_LEVEL: VMStateDescription<HPETState> =
VMStateDescriptionBuilder::<HPETState>::new()
.name(c"hpet/rtc_irq_level")
@@ -1001,12 +966,6 @@ const VMSTATE_HPET: VMStateDescription<HPETState> =
))
.build();
-// SAFETY: HPET_PROPERTIES is a valid Property array constructed with the
-// hwcore::declare_properties macro.
-unsafe impl hwcore::DevicePropertiesImpl for HPETState {
- const PROPERTIES: &'static [Property] = &HPET_PROPERTIES;
-}
-
impl DeviceImpl for HPETState {
const VMSTATE: Option<VMStateDescription<Self>> = Some(VMSTATE_HPET);
const REALIZE: Option<fn(&Self) -> util::Result<()>> = Some(Self::realize);
diff --git a/rust/meson.build b/rust/meson.build
index c7bd6ab..b3ac3a7 100644
--- a/rust/meson.build
+++ b/rust/meson.build
@@ -13,10 +13,12 @@ libc_rs = dependency('libc-0.2-rs')
subproject('proc-macro2-1-rs', required: true)
subproject('quote-1-rs', required: true)
subproject('syn-2-rs', required: true)
+subproject('attrs-0.2-rs', required: true)
quote_rs_native = dependency('quote-1-rs', native: true)
syn_rs_native = dependency('syn-2-rs', native: true)
proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true)
+attrs_rs_native = dependency('attrs-0.2-rs', native: true)
genrs = []
diff --git a/rust/migration/meson.build b/rust/migration/meson.build
index 5e820d4..2a49bd1 100644
--- a/rust/migration/meson.build
+++ b/rust/migration/meson.build
@@ -48,6 +48,5 @@ migration_rs = declare_dependency(link_with: [_migration_rs],
# in a separate suite that is run by the "build" CI jobs rather than "check".
rust.doctest('rust-migration-rs-doctests',
_migration_rs,
- protocol: 'rust',
dependencies: migration_rs,
suite: ['doc', 'rust'])
diff --git a/rust/migration/src/vmstate.rs b/rust/migration/src/vmstate.rs
index c05c4a1..e04b19b 100644
--- a/rust/migration/src/vmstate.rs
+++ b/rust/migration/src/vmstate.rs
@@ -144,7 +144,7 @@ macro_rules! vmstate_of {
$crate::bindings::VMStateField {
name: ::core::concat!(::core::stringify!($field_name), "\0")
.as_bytes()
- .as_ptr() as *const ::std::os::raw::c_char,
+ .as_ptr().cast::<::std::os::raw::c_char>(),
offset: ::std::mem::offset_of!($struct_name, $field_name),
$(num_offset: ::std::mem::offset_of!($struct_name, $num),)?
$(field_exists: $crate::vmstate_exist_fn!($struct_name, $test_fn),)?
diff --git a/rust/qemu-macros/Cargo.toml b/rust/qemu-macros/Cargo.toml
index 3b6f1d3..c25b6c0 100644
--- a/rust/qemu-macros/Cargo.toml
+++ b/rust/qemu-macros/Cargo.toml
@@ -16,6 +16,7 @@ rust-version.workspace = true
proc-macro = true
[dependencies]
+attrs = "0.2.9"
proc-macro2 = "1"
quote = "1"
syn = { version = "2", features = ["extra-traits"] }
diff --git a/rust/qemu-macros/meson.build b/rust/qemu-macros/meson.build
index d0b2992..0f27e0d 100644
--- a/rust/qemu-macros/meson.build
+++ b/rust/qemu-macros/meson.build
@@ -8,6 +8,7 @@ _qemu_macros_rs = rust.proc_macro(
'--cfg', 'feature="proc-macro"',
],
dependencies: [
+ attrs_rs_native,
proc_macro2_rs_native,
quote_rs_native,
syn_rs_native,
diff --git a/rust/qemu-macros/src/lib.rs b/rust/qemu-macros/src/lib.rs
index 830b432..3e21b67 100644
--- a/rust/qemu-macros/src/lib.rs
+++ b/rust/qemu-macros/src/lib.rs
@@ -3,10 +3,14 @@
// SPDX-License-Identifier: GPL-2.0-or-later
use proc_macro::TokenStream;
-use quote::{quote, quote_spanned, ToTokens};
+use quote::{quote, quote_spanned};
use syn::{
- parse::Parse, parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned,
- token::Comma, Data, DeriveInput, Error, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token,
+ parse::{Parse, ParseStream},
+ parse_macro_input, parse_quote,
+ punctuated::Punctuated,
+ spanned::Spanned,
+ token::Comma,
+ Attribute, Data, DeriveInput, Error, Field, Fields, FieldsUnnamed, Ident, Meta, Path, Token,
Variant,
};
mod bits;
@@ -159,61 +163,39 @@ enum DevicePropertyName {
Str(syn::LitStr),
}
-#[derive(Debug)]
+impl Parse for DevicePropertyName {
+ fn parse(input: ParseStream<'_>) -> syn::Result<Self> {
+ let lo = input.lookahead1();
+ if lo.peek(syn::LitStr) {
+ Ok(Self::Str(input.parse()?))
+ } else if lo.peek(syn::LitCStr) {
+ Ok(Self::CStr(input.parse()?))
+ } else {
+ Err(lo.error())
+ }
+ }
+}
+
+#[derive(Default, Debug)]
struct DeviceProperty {
rename: Option<DevicePropertyName>,
+ bitnr: Option<syn::Expr>,
defval: Option<syn::Expr>,
}
-impl Parse for DeviceProperty {
- fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
- let _: syn::Token![#] = input.parse()?;
- let bracketed;
- _ = syn::bracketed!(bracketed in input);
- let attribute = bracketed.parse::<syn::Ident>()?;
- debug_assert_eq!(&attribute.to_string(), "property");
- let mut retval = Self {
- rename: None,
- defval: None,
- };
- let content;
- _ = syn::parenthesized!(content in bracketed);
- while !content.is_empty() {
- let value: syn::Ident = content.parse()?;
- if value == "rename" {
- let _: syn::Token![=] = content.parse()?;
- if retval.rename.is_some() {
- return Err(syn::Error::new(
- value.span(),
- "`rename` can only be used at most once",
- ));
- }
- if content.peek(syn::LitStr) {
- retval.rename = Some(DevicePropertyName::Str(content.parse::<syn::LitStr>()?));
- } else {
- retval.rename =
- Some(DevicePropertyName::CStr(content.parse::<syn::LitCStr>()?));
- }
- } else if value == "default" {
- let _: syn::Token![=] = content.parse()?;
- if retval.defval.is_some() {
- return Err(syn::Error::new(
- value.span(),
- "`default` can only be used at most once",
- ));
- }
- retval.defval = Some(content.parse()?);
- } else {
- return Err(syn::Error::new(
- value.span(),
- format!("unrecognized field `{value}`"),
- ));
- }
+impl DeviceProperty {
+ fn parse_from(&mut self, a: &Attribute) -> syn::Result<()> {
+ use attrs::{set, with, Attrs};
+ let mut parser = Attrs::new();
+ parser.once("rename", with::eq(set::parse(&mut self.rename)));
+ parser.once("bit", with::eq(set::parse(&mut self.bitnr)));
+ parser.once("default", with::eq(set::parse(&mut self.defval)));
+ a.parse_args_with(&mut parser)
+ }
- if !content.is_empty() {
- let _: syn::Token![,] = content.parse()?;
- }
- }
+ fn parse(a: &Attribute) -> syn::Result<Self> {
+ let mut retval = Self::default();
+ retval.parse_from(a)?;
Ok(retval)
}
}
@@ -235,14 +217,18 @@ fn derive_device_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream
f.attrs
.iter()
.filter(|a| a.path().is_ident("property"))
- .map(|a| Ok((f.clone(), syn::parse2(a.to_token_stream())?)))
+ .map(|a| Ok((f.clone(), DeviceProperty::parse(a)?)))
})
.collect::<Result<Vec<_>, Error>>()?;
let name = &input.ident;
let mut properties_expanded = vec![];
for (field, prop) in properties {
- let DeviceProperty { rename, defval } = prop;
+ let DeviceProperty {
+ rename,
+ bitnr,
+ defval,
+ } = prop;
let field_name = field.ident.unwrap();
macro_rules! str_to_c_str {
($value:expr, $span:expr) => {{
@@ -262,8 +248,8 @@ fn derive_device_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream
let prop_name = rename.map_or_else(
|| str_to_c_str!(field_name.to_string(), field_name.span()),
- |rename| -> Result<proc_macro2::TokenStream, Error> {
- match rename {
+ |prop_rename| -> Result<proc_macro2::TokenStream, Error> {
+ match prop_rename {
DevicePropertyName::CStr(cstr_lit) => Ok(quote! { #cstr_lit }),
DevicePropertyName::Str(str_lit) => {
str_to_c_str!(str_lit.value(), str_lit.span())
@@ -272,14 +258,20 @@ fn derive_device_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream
},
)?;
let field_ty = field.ty.clone();
- let qdev_prop = quote! { <#field_ty as ::hwcore::QDevProp>::VALUE };
+ let qdev_prop = if bitnr.is_none() {
+ quote! { <#field_ty as ::hwcore::QDevProp>::BASE_INFO }
+ } else {
+ quote! { <#field_ty as ::hwcore::QDevProp>::BIT_INFO }
+ };
+ let bitnr = bitnr.unwrap_or(syn::Expr::Verbatim(quote! { 0 }));
let set_default = defval.is_some();
let defval = defval.unwrap_or(syn::Expr::Verbatim(quote! { 0 }));
properties_expanded.push(quote! {
::hwcore::bindings::Property {
name: ::std::ffi::CStr::as_ptr(#prop_name),
- info: #qdev_prop ,
+ info: #qdev_prop,
offset: ::core::mem::offset_of!(#name, #field_name) as isize,
+ bitnr: #bitnr,
set_default: #set_default,
defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: #defval as u64 },
..::common::Zeroable::ZERO
diff --git a/rust/qemu-macros/src/tests.rs b/rust/qemu-macros/src/tests.rs
index 9ab7eab..ac998d2 100644
--- a/rust/qemu-macros/src/tests.rs
+++ b/rust/qemu-macros/src/tests.rs
@@ -60,7 +60,7 @@ fn test_derive_device() {
migrate_clock: bool,
}
},
- "unrecognized field `defalt`"
+ "Expected one of `bit`, `default` or `rename`"
);
// Check that repeated attributes are not allowed:
derive_compile_fail!(
@@ -73,7 +73,8 @@ fn test_derive_device() {
migrate_clock: bool,
}
},
- "`rename` can only be used at most once"
+ "Duplicate argument",
+ "Already used here",
);
derive_compile_fail!(
derive_device_or_error,
@@ -85,7 +86,21 @@ fn test_derive_device() {
migrate_clock: bool,
}
},
- "`default` can only be used at most once"
+ "Duplicate argument",
+ "Already used here",
+ );
+ derive_compile_fail!(
+ derive_device_or_error,
+ quote! {
+ #[repr(C)]
+ #[derive(Device)]
+ struct DummyState {
+ #[property(bit = 0, bit = 1)]
+ flags: u32,
+ }
+ },
+ "Duplicate argument",
+ "Already used here",
);
// Check that the field name is preserved when `rename` isn't used:
derive_compile!(
@@ -104,8 +119,9 @@ fn test_derive_device() {
const PROPERTIES: &'static [::hwcore::bindings::Property] = &[
::hwcore::bindings::Property {
name: ::std::ffi::CStr::as_ptr(c"migrate_clock"),
- info: <bool as ::hwcore::QDevProp>::VALUE,
+ info: <bool as ::hwcore::QDevProp>::BASE_INFO,
offset: ::core::mem::offset_of!(DummyState, migrate_clock) as isize,
+ bitnr: 0,
set_default: true,
defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: true as u64 },
..::common::Zeroable::ZERO
@@ -131,8 +147,9 @@ fn test_derive_device() {
const PROPERTIES: &'static [::hwcore::bindings::Property] = &[
::hwcore::bindings::Property {
name: ::std::ffi::CStr::as_ptr(c"migrate-clk"),
- info: <bool as ::hwcore::QDevProp>::VALUE,
+ info: <bool as ::hwcore::QDevProp>::BASE_INFO,
offset: ::core::mem::offset_of!(DummyState, migrate_clock) as isize,
+ bitnr: 0,
set_default: true,
defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: true as u64 },
..::common::Zeroable::ZERO
@@ -141,6 +158,92 @@ fn test_derive_device() {
}
}
);
+ // Check that `bit` value is used for the bit property without default
+ // value (note: though C macro (e.g., DEFINE_PROP_BIT) always requires
+ // default value, Rust side allows to default this field to "0"):
+ derive_compile!(
+ derive_device_or_error,
+ quote! {
+ #[repr(C)]
+ #[derive(Device)]
+ pub struct DummyState {
+ parent: ParentField<DeviceState>,
+ #[property(bit = 3)]
+ flags: u32,
+ }
+ },
+ quote! {
+ unsafe impl ::hwcore::DevicePropertiesImpl for DummyState {
+ const PROPERTIES: &'static [::hwcore::bindings::Property] = &[
+ ::hwcore::bindings::Property {
+ name: ::std::ffi::CStr::as_ptr(c"flags"),
+ info: <u32 as ::hwcore::QDevProp>::BIT_INFO,
+ offset: ::core::mem::offset_of!(DummyState, flags) as isize,
+ bitnr: 3,
+ set_default: false,
+ defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: 0 as u64 },
+ ..::common::Zeroable::ZERO
+ }
+ ];
+ }
+ }
+ );
+ // Check that `bit` value is used for the bit property when used:
+ derive_compile!(
+ derive_device_or_error,
+ quote! {
+ #[repr(C)]
+ #[derive(Device)]
+ pub struct DummyState {
+ parent: ParentField<DeviceState>,
+ #[property(bit = 3, default = true)]
+ flags: u32,
+ }
+ },
+ quote! {
+ unsafe impl ::hwcore::DevicePropertiesImpl for DummyState {
+ const PROPERTIES: &'static [::hwcore::bindings::Property] = &[
+ ::hwcore::bindings::Property {
+ name: ::std::ffi::CStr::as_ptr(c"flags"),
+ info: <u32 as ::hwcore::QDevProp>::BIT_INFO,
+ offset: ::core::mem::offset_of!(DummyState, flags) as isize,
+ bitnr: 3,
+ set_default: true,
+ defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: true as u64 },
+ ..::common::Zeroable::ZERO
+ }
+ ];
+ }
+ }
+ );
+ // Check that `bit` value is used for the bit property with rename when used:
+ derive_compile!(
+ derive_device_or_error,
+ quote! {
+ #[repr(C)]
+ #[derive(Device)]
+ pub struct DummyState {
+ parent: ParentField<DeviceState>,
+ #[property(rename = "msi", bit = 3, default = false)]
+ flags: u64,
+ }
+ },
+ quote! {
+ unsafe impl ::hwcore::DevicePropertiesImpl for DummyState {
+ const PROPERTIES: &'static [::hwcore::bindings::Property] = &[
+ ::hwcore::bindings::Property {
+ name: ::std::ffi::CStr::as_ptr(c"msi"),
+ info: <u64 as ::hwcore::QDevProp>::BIT_INFO,
+ offset: ::core::mem::offset_of!(DummyState, flags) as isize,
+ bitnr: 3,
+ set_default: true,
+ defval: ::hwcore::bindings::Property__bindgen_ty_1 { u: false as u64 },
+ ..::common::Zeroable::ZERO
+ }
+ ];
+ }
+ }
+ );
}
#[test]
diff --git a/rust/qom/meson.build b/rust/qom/meson.build
index 40c51b7..21e1214 100644
--- a/rust/qom/meson.build
+++ b/rust/qom/meson.build
@@ -38,6 +38,5 @@ qom_rs = declare_dependency(link_with: [_qom_rs], dependencies: [qemu_macros, qo
# in a separate suite that is run by the "build" CI jobs rather than "check".
rust.doctest('rust-qom-rs-doctests',
_qom_rs,
- protocol: 'rust',
dependencies: qom_rs,
suite: ['doc', 'rust'])
diff --git a/rust/util/meson.build b/rust/util/meson.build
index 87a8936..7ca6993 100644
--- a/rust/util/meson.build
+++ b/rust/util/meson.build
@@ -44,12 +44,15 @@ _util_rs = static_library(
util_rs = declare_dependency(link_with: [_util_rs], dependencies: [qemuutil, qom])
+rust.test('rust-util-tests', _util_rs,
+ dependencies: [qemuutil, qom],
+ suite: ['unit', 'rust'])
+
# Doctests are essentially integration tests, so they need the same dependencies.
# Note that running them requires the object files for C code, so place them
# in a separate suite that is run by the "build" CI jobs rather than "check".
rust.doctest('rust-util-rs-doctests',
_util_rs,
- protocol: 'rust',
dependencies: util_rs,
suite: ['doc', 'rust']
)
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index 035828c..476a996 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -27,7 +27,7 @@ sub_file="${sub_tdir}/submodule.tar"
# in their checkout, because the build environment is completely
# different to the host OS.
subprojects="keycodemapdb libvfio-user berkeley-softfloat-3
- berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs
+ berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs attrs-0.2-rs bilge-0.2-rs
bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs
libc-0.2-rs proc-macro2-1-rs
proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 91616c9..40b6955 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1816,7 +1816,8 @@ sub process {
}
# Check SPDX-License-Identifier references a permitted license
- if ($rawline =~ m,SPDX-License-Identifier: (.*?)(\*/)?\s*$,) {
+ if (($rawline =~ m,SPDX-License-Identifier: (.*?)(\*/)?\s*$,) &&
+ $rawline !~ /^-/) {
$fileinfo->{facts}->{sawspdx} = 1;
&checkspdx($realfile, $1);
}
diff --git a/scripts/ci/gitlab-failure-analysis b/scripts/ci/gitlab-failure-analysis
new file mode 100755
index 0000000..906725b
--- /dev/null
+++ b/scripts/ci/gitlab-failure-analysis
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+#
+# A script to analyse failures in the gitlab pipelines. It requires an
+# API key from gitlab with the following permissions:
+# - api
+# - read_repository
+# - read_user
+#
+
+import argparse
+import gitlab
+import os
+
+#
+# Arguments
+#
+class NoneForEmptyStringAction(argparse.Action):
+ def __call__(self, parser, namespace, value, option_string=None):
+ if value == '':
+ setattr(namespace, self.dest, None)
+ else:
+ setattr(namespace, self.dest, value)
+
+
+parser = argparse.ArgumentParser(description="Analyse failed GitLab CI runs.")
+
+parser.add_argument("--gitlab",
+ default="https://gitlab.com",
+ help="GitLab instance URL (default: https://gitlab.com).")
+parser.add_argument("--id", default=11167699,
+ type=int,
+ help="GitLab project id (default: 11167699 for qemu-project/qemu)")
+parser.add_argument("--token",
+ default=os.getenv("GITLAB_TOKEN"),
+ help="Your personal access token with 'api' scope.")
+parser.add_argument("--branch",
+ type=str,
+ default="staging",
+ action=NoneForEmptyStringAction,
+ help="The name of the branch (default: 'staging')")
+parser.add_argument("--status",
+ type=str,
+ action=NoneForEmptyStringAction,
+ default="failed",
+ help="Filter by branch status (default: 'failed')")
+parser.add_argument("--count", type=int,
+ default=3,
+ help="The number of failed runs to fetch.")
+parser.add_argument("--skip-jobs",
+ default=False,
+ action='store_true',
+ help="Skip dumping the job info")
+parser.add_argument("--pipeline", type=int,
+ nargs="+",
+ default=None,
+ help="Explicit pipeline ID(s) to fetch.")
+
+
+if __name__ == "__main__":
+ args = parser.parse_args()
+
+ gl = gitlab.Gitlab(url=args.gitlab, private_token=args.token)
+ project = gl.projects.get(args.id)
+
+
+ pipelines_to_process = []
+
+ # Use explicit pipeline IDs if provided, otherwise fetch a list
+ if args.pipeline:
+ args.count = len(args.pipeline)
+ for p_id in args.pipeline:
+ pipelines_to_process.append(project.pipelines.get(p_id))
+ else:
+ # Use an iterator to fetch the pipelines
+ pipe_iter = project.pipelines.list(iterator=True,
+ status=args.status,
+ ref=args.branch)
+ # Check each failed pipeline
+ pipelines_to_process = [next(pipe_iter) for _ in range(args.count)]
+
+ # Check each pipeline
+ for p in pipelines_to_process:
+
+ jobs = p.jobs.list(get_all=True)
+ failed_jobs = [j for j in jobs if j.status == "failed"]
+ skipped_jobs = [j for j in jobs if j.status == "skipped"]
+ manual_jobs = [j for j in jobs if j.status == "manual"]
+
+ trs = p.test_report_summary.get()
+ total = trs.total["count"]
+ skipped = trs.total["skipped"]
+ failed = trs.total["failed"]
+
+ print(f"{p.status} pipeline {p.id}, total jobs {len(jobs)}, "
+ f"skipped {len(skipped_jobs)}, "
+ f"failed {len(failed_jobs)}, ",
+ f"{total} tests, "
+ f"{skipped} skipped tests, "
+ f"{failed} failed tests")
+
+ if not args.skip_jobs:
+ for j in failed_jobs:
+ print(f" Failed job {j.id}, {j.name}, {j.web_url}")
+
+ # It seems we can only extract failing tests from the full
+ # test report, maybe there is some way to filter it.
+
+ if failed > 0:
+ ftr = p.test_report.get()
+ failed_suites = [s for s in ftr.test_suites if
+ s["failed_count"] > 0]
+ for fs in failed_suites:
+ name = fs["name"]
+ tests = fs["test_cases"]
+ failed_tests = [t for t in tests if t["status"] == 'failed']
+ for t in failed_tests:
+ print(f" Failed test {t["classname"]}, {name}, {t["name"]}")
diff --git a/scripts/make-release b/scripts/make-release
index 87f563e..bc1b43c 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -40,7 +40,7 @@ fi
# Only include wraps that are invoked with subproject()
SUBPROJECTS="libvfio-user keycodemapdb berkeley-softfloat-3
- berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs
+ berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs attrs-0.2-rs bilge-0.2-rs
bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs
libc-0.2-rs proc-macro2-1-rs
proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs
diff --git a/semihosting/arm-compat-semi-stub.c b/semihosting/arm-compat-semi-stub.c
new file mode 100644
index 0000000..bfa3681
--- /dev/null
+++ b/semihosting/arm-compat-semi-stub.c
@@ -0,0 +1,19 @@
+/*
+ * Stubs for platforms different from ARM
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "semihosting/semihost.h"
+#include <glib.h>
+
+bool semihosting_arm_compatible(void)
+{
+ return false;
+}
+
+void semihosting_arm_compatible_init(void)
+{
+ g_assert_not_reached();
+}
diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
index bcd13cd..6100126 100644
--- a/semihosting/arm-compat-semi.c
+++ b/semihosting/arm-compat-semi.c
@@ -100,6 +100,13 @@ static int gdb_open_modeflags[12] = {
GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND,
};
+/*
+ * For ARM semihosting, we have a separate structure for routing
+ * data for the console which is outside the guest fd address space.
+ */
+static GuestFD console_in_gf;
+static GuestFD console_out_gf;
+
#ifndef CONFIG_USER_ONLY
/**
@@ -115,7 +122,7 @@ static int gdb_open_modeflags[12] = {
*/
typedef struct LayoutInfo {
- target_ulong rambase;
+ vaddr rambase;
size_t ramsize;
hwaddr heapbase;
hwaddr heaplimit;
@@ -166,8 +173,7 @@ static LayoutInfo common_semi_find_bases(CPUState *cs)
#endif
-#include "cpu.h"
-#include "common-semi-target.h"
+#include "semihosting/common-semi.h"
/*
* Read the input value from the argument block; fail the semihosting
@@ -207,7 +213,7 @@ static LayoutInfo common_semi_find_bases(CPUState *cs)
* global, and we assume that the guest takes care of avoiding any races.
*/
#ifndef CONFIG_USER_ONLY
-static target_ulong syscall_err;
+static uint64_t syscall_err;
#include "semihosting/uaccess.h"
#endif
@@ -253,8 +259,8 @@ static void common_semi_rw_cb(CPUState *cs, uint64_t ret, int err)
{
/* Recover the original length from the third argument. */
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
- target_ulong args = common_semi_arg(cs, 1);
- target_ulong arg2;
+ uint64_t args = common_semi_arg(cs, 1);
+ uint64_t arg2;
GET_ARG(2);
if (err) {
@@ -293,9 +299,9 @@ static void common_semi_seek_cb(CPUState *cs, uint64_t ret, int err)
* is defined by GDB's remote protocol and is not target-specific.)
* We put this on the guest's stack just below SP.
*/
-static target_ulong common_semi_flen_buf(CPUState *cs)
+static uint64_t common_semi_flen_buf(CPUState *cs)
{
- target_ulong sp = common_semi_stack_bottom(cs);
+ vaddr sp = common_semi_stack_bottom(cs);
return sp - 64;
}
@@ -352,6 +358,25 @@ static const uint8_t featurefile_data[] = {
SH_EXT_EXIT_EXTENDED | SH_EXT_STDOUT_STDERR, /* Feature byte 0 */
};
+bool semihosting_arm_compatible(void)
+{
+ return true;
+}
+
+void semihosting_arm_compatible_init(void)
+{
+ /* For ARM-compat, the console is in a separate namespace. */
+ if (use_gdb_syscalls()) {
+ console_in_gf.type = GuestFDGDB;
+ console_in_gf.hostfd = 0;
+ console_out_gf.type = GuestFDGDB;
+ console_out_gf.hostfd = 2;
+ } else {
+ console_in_gf.type = GuestFDConsole;
+ console_out_gf.type = GuestFDConsole;
+ }
+}
+
/*
* Do a semihosting call.
*
@@ -363,9 +388,9 @@ static const uint8_t featurefile_data[] = {
void do_common_semihosting(CPUState *cs)
{
CPUArchState *env = cpu_env(cs);
- target_ulong args;
- target_ulong arg0, arg1, arg2, arg3;
- target_ulong ul_ret;
+ uint64_t args;
+ uint64_t arg0, arg1, arg2, arg3;
+ uint64_t ul_ret;
char * s;
int nr;
int64_t elapsed;
@@ -436,7 +461,7 @@ void do_common_semihosting(CPUState *cs)
case TARGET_SYS_WRITEC:
/*
- * FIXME: the byte to be written is in a target_ulong slot,
+ * FIXME: the byte to be written is in a uint64_t slot,
* which means this is wrong for a big-endian guest.
*/
semihost_sys_write_gf(cs, common_semi_dead_cb,
@@ -475,10 +500,13 @@ void do_common_semihosting(CPUState *cs)
break;
case TARGET_SYS_ISERROR:
+ {
GET_ARG(0);
- common_semi_set_ret(cs, (target_long)arg0 < 0);
+ bool ret = is_64bit_semihosting(env) ?
+ (int64_t)arg0 < 0 : (int32_t)arg0 < 0;
+ common_semi_set_ret(cs, ret);
break;
-
+ }
case TARGET_SYS_ISTTY:
GET_ARG(0);
semihost_sys_isatty(cs, common_semi_istty_cb, arg0);
@@ -662,7 +690,7 @@ void do_common_semihosting(CPUState *cs)
case TARGET_SYS_HEAPINFO:
{
- target_ulong retvals[4];
+ uint64_t retvals[4];
int i;
#ifdef CONFIG_USER_ONLY
TaskState *ts = get_task_state(cs);
@@ -728,7 +756,8 @@ void do_common_semihosting(CPUState *cs)
{
uint32_t ret;
- if (common_semi_sys_exit_extended(cs, nr)) {
+ if (nr == TARGET_SYS_EXIT_EXTENDED ||
+ common_semi_sys_exit_is_extended(cs)) {
/*
* The A64 version of SYS_EXIT takes a parameter block,
* so the application-exit type can return a subcode which
@@ -759,7 +788,7 @@ void do_common_semihosting(CPUState *cs)
case TARGET_SYS_ELAPSED:
elapsed = get_clock() - clock_start;
- if (sizeof(target_ulong) == 8) {
+ if (is_64bit_semihosting(env)) {
if (SET_ARG(0, elapsed)) {
goto do_fault;
}
diff --git a/semihosting/guestfd.c b/semihosting/guestfd.c
index d324143..e8f236c 100644
--- a/semihosting/guestfd.c
+++ b/semihosting/guestfd.c
@@ -12,35 +12,20 @@
#include "gdbstub/syscalls.h"
#include "semihosting/semihost.h"
#include "semihosting/guestfd.h"
-#ifndef CONFIG_USER_ONLY
-#include CONFIG_DEVICES
-#endif
static GArray *guestfd_array;
-#ifdef CONFIG_ARM_COMPATIBLE_SEMIHOSTING
-GuestFD console_in_gf;
-GuestFD console_out_gf;
-#endif
-
void qemu_semihosting_guestfd_init(void)
{
/* New entries zero-initialized, i.e. type GuestFDUnused */
guestfd_array = g_array_new(FALSE, TRUE, sizeof(GuestFD));
-#ifdef CONFIG_ARM_COMPATIBLE_SEMIHOSTING
- /* For ARM-compat, the console is in a separate namespace. */
- if (use_gdb_syscalls()) {
- console_in_gf.type = GuestFDGDB;
- console_in_gf.hostfd = 0;
- console_out_gf.type = GuestFDGDB;
- console_out_gf.hostfd = 2;
- } else {
- console_in_gf.type = GuestFDConsole;
- console_out_gf.type = GuestFDConsole;
+ if (semihosting_arm_compatible()) {
+ semihosting_arm_compatible_init();
+ return;
}
-#else
- /* Otherwise, the stdio file descriptors apply. */
+
+ /* Out of ARM, the stdio file descriptors apply. */
guestfd_array = g_array_set_size(guestfd_array, 3);
#ifndef CONFIG_USER_ONLY
if (!use_gdb_syscalls()) {
@@ -54,7 +39,6 @@ void qemu_semihosting_guestfd_init(void)
associate_guestfd(0, 0);
associate_guestfd(1, 1);
associate_guestfd(2, 2);
-#endif
}
/*
diff --git a/semihosting/meson.build b/semihosting/meson.build
index b1ab250..99f10e2 100644
--- a/semihosting/meson.build
+++ b/semihosting/meson.build
@@ -1,17 +1,21 @@
-specific_ss.add(when: 'CONFIG_SEMIHOSTING', if_true: files(
- 'guestfd.c',
- 'syscalls.c',
-))
-
common_ss.add(when: 'CONFIG_SEMIHOSTING', if_false: files('stubs-all.c'))
-user_ss.add(when: 'CONFIG_SEMIHOSTING', if_true: files('user.c'))
+user_ss.add(when: 'CONFIG_SEMIHOSTING', if_true: files(
+ 'user.c',
+ 'guestfd.c'))
system_ss.add(when: 'CONFIG_SEMIHOSTING', if_true: files(
'config.c',
'console.c',
+ 'guestfd.c',
'uaccess.c',
+ 'syscalls.c',
), if_false: files(
'stubs-system.c',
))
+system_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING',
+ if_true: files('arm-compat-semi.c'),
+ if_false: files('arm-compat-semi-stub.c'))
-specific_ss.add(when: ['CONFIG_ARM_COMPATIBLE_SEMIHOSTING'],
+specific_ss.add(when: ['CONFIG_SEMIHOSTING', 'CONFIG_USER_ONLY'],
+ if_true: files('syscalls.c'))
+specific_ss.add(when: ['CONFIG_ARM_COMPATIBLE_SEMIHOSTING', 'CONFIG_USER_ONLY'],
if_true: files('arm-compat-semi.c'))
diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c
index f6451d9..20f155f 100644
--- a/semihosting/syscalls.c
+++ b/semihosting/syscalls.c
@@ -8,7 +8,6 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
-#include "cpu.h"
#include "gdbstub/syscalls.h"
#include "semihosting/guestfd.h"
#include "semihosting/syscalls.h"
@@ -23,7 +22,7 @@
/*
* Validate or compute the length of the string (including terminator).
*/
-static int validate_strlen(CPUState *cs, target_ulong str, target_ulong tlen)
+static int validate_strlen(CPUState *cs, vaddr str, uint64_t tlen)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char c;
@@ -52,7 +51,7 @@ static int validate_strlen(CPUState *cs, target_ulong str, target_ulong tlen)
}
static int validate_lock_user_string(char **pstr, CPUState *cs,
- target_ulong tstr, target_ulong tlen)
+ vaddr tstr, uint64_t tlen)
{
int ret = validate_strlen(cs, tstr, tlen);
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
@@ -72,7 +71,7 @@ static int validate_lock_user_string(char **pstr, CPUState *cs,
* big-endian. Until we do something with gdb, also produce the
* same big-endian result from the host.
*/
-static int copy_stat_to_user(CPUState *cs, target_ulong addr,
+static int copy_stat_to_user(CPUState *cs, vaddr addr,
const struct stat *s)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
@@ -129,7 +128,7 @@ static void gdb_open_cb(CPUState *cs, uint64_t ret, int err)
}
static void gdb_open(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
+ vaddr fname, uint64_t fname_len,
int gdb_flags, int mode)
{
int len = validate_strlen(cs, fname, fname_len);
@@ -140,7 +139,7 @@ static void gdb_open(CPUState *cs, gdb_syscall_complete_cb complete,
gdb_open_complete = complete;
gdb_do_syscall(gdb_open_cb, "open,%s,%x,%x",
- (uint64_t)fname, (uint32_t)len,
+ (vaddr)fname, (uint32_t)len,
(uint32_t)gdb_flags, (uint32_t)mode);
}
@@ -151,17 +150,17 @@ static void gdb_close(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void gdb_read(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
gdb_do_syscall(complete, "read,%x,%lx,%lx",
- (uint32_t)gf->hostfd, (uint64_t)buf, (uint64_t)len);
+ (uint32_t)gf->hostfd, (vaddr)buf, (uint64_t)len);
}
static void gdb_write(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
gdb_do_syscall(complete, "write,%x,%lx,%lx",
- (uint32_t)gf->hostfd, (uint64_t)buf, (uint64_t)len);
+ (uint32_t)gf->hostfd, (vaddr)buf, (uint64_t)len);
}
static void gdb_lseek(CPUState *cs, gdb_syscall_complete_cb complete,
@@ -178,15 +177,15 @@ static void gdb_isatty(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void gdb_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong addr)
+ GuestFD *gf, vaddr addr)
{
gdb_do_syscall(complete, "fstat,%x,%lx",
- (uint32_t)gf->hostfd, (uint64_t)addr);
+ (uint32_t)gf->hostfd, (vaddr)addr);
}
static void gdb_stat(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
- target_ulong addr)
+ vaddr fname, uint64_t fname_len,
+ vaddr addr)
{
int len = validate_strlen(cs, fname, fname_len);
if (len < 0) {
@@ -195,11 +194,11 @@ static void gdb_stat(CPUState *cs, gdb_syscall_complete_cb complete,
}
gdb_do_syscall(complete, "stat,%s,%lx",
- (uint64_t)fname, (uint32_t)len, (uint64_t)addr);
+ (vaddr)fname, (uint32_t)len, (vaddr)addr);
}
static void gdb_remove(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len)
+ vaddr fname, uint64_t fname_len)
{
int len = validate_strlen(cs, fname, fname_len);
if (len < 0) {
@@ -207,12 +206,12 @@ static void gdb_remove(CPUState *cs, gdb_syscall_complete_cb complete,
return;
}
- gdb_do_syscall(complete, "unlink,%s", (uint64_t)fname, (uint32_t)len);
+ gdb_do_syscall(complete, "unlink,%s", (vaddr)fname, (uint32_t)len);
}
static void gdb_rename(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong oname, target_ulong oname_len,
- target_ulong nname, target_ulong nname_len)
+ vaddr oname, uint64_t oname_len,
+ vaddr nname, uint64_t nname_len)
{
int olen, nlen;
@@ -228,12 +227,12 @@ static void gdb_rename(CPUState *cs, gdb_syscall_complete_cb complete,
}
gdb_do_syscall(complete, "rename,%s,%s",
- (uint64_t)oname, (uint32_t)olen,
- (uint64_t)nname, (uint32_t)nlen);
+ (vaddr)oname, (uint32_t)olen,
+ (vaddr)nname, (uint32_t)nlen);
}
static void gdb_system(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong cmd, target_ulong cmd_len)
+ vaddr cmd, uint64_t cmd_len)
{
int len = validate_strlen(cs, cmd, cmd_len);
if (len < 0) {
@@ -241,14 +240,14 @@ static void gdb_system(CPUState *cs, gdb_syscall_complete_cb complete,
return;
}
- gdb_do_syscall(complete, "system,%s", (uint64_t)cmd, (uint32_t)len);
+ gdb_do_syscall(complete, "system,%s", (vaddr)cmd, (uint32_t)len);
}
static void gdb_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong tv_addr, target_ulong tz_addr)
+ vaddr tv_addr, vaddr tz_addr)
{
gdb_do_syscall(complete, "gettimeofday,%lx,%lx",
- (uint64_t)tv_addr, (uint64_t)tz_addr);
+ (vaddr)tv_addr, (vaddr)tz_addr);
}
/*
@@ -256,7 +255,7 @@ static void gdb_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
*/
static void host_open(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
+ vaddr fname, uint64_t fname_len,
int gdb_flags, int mode)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
@@ -316,7 +315,7 @@ static void host_close(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_read(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
void *ptr = lock_user(VERIFY_WRITE, buf, len, 0);
@@ -337,7 +336,7 @@ static void host_read(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_write(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
void *ptr = lock_user(VERIFY_READ, buf, len, 1);
@@ -395,7 +394,7 @@ static void host_flen(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong addr)
+ GuestFD *gf, vaddr addr)
{
struct stat buf;
int ret;
@@ -410,8 +409,8 @@ static void host_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_stat(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
- target_ulong addr)
+ vaddr fname, uint64_t fname_len,
+ vaddr addr)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
struct stat buf;
@@ -440,7 +439,7 @@ static void host_stat(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_remove(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len)
+ vaddr fname, uint64_t fname_len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char *p;
@@ -458,8 +457,8 @@ static void host_remove(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_rename(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong oname, target_ulong oname_len,
- target_ulong nname, target_ulong nname_len)
+ vaddr oname, uint64_t oname_len,
+ vaddr nname, uint64_t nname_len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char *ostr, *nstr;
@@ -484,7 +483,7 @@ static void host_rename(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_system(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong cmd, target_ulong cmd_len)
+ vaddr cmd, uint64_t cmd_len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char *p;
@@ -502,7 +501,7 @@ static void host_system(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void host_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong tv_addr, target_ulong tz_addr)
+ vaddr tv_addr, vaddr tz_addr)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
struct gdb_timeval *p;
@@ -547,10 +546,10 @@ static void host_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
*/
static void staticfile_read(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
- target_ulong rest = gf->staticfile.len - gf->staticfile.off;
+ uint64_t rest = gf->staticfile.len - gf->staticfile.off;
void *ptr;
if (len > rest) {
@@ -605,7 +604,7 @@ static void staticfile_flen(CPUState *cs, gdb_syscall_complete_cb complete,
*/
static void console_read(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char *ptr;
@@ -622,7 +621,7 @@ static void console_read(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void console_write(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
CPUArchState *env G_GNUC_UNUSED = cpu_env(cs);
char *ptr = lock_user(VERIFY_READ, buf, len, 1);
@@ -638,7 +637,7 @@ static void console_write(CPUState *cs, gdb_syscall_complete_cb complete,
}
static void console_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong addr)
+ GuestFD *gf, vaddr addr)
{
static const struct stat tty_buf = {
.st_mode = 020666, /* S_IFCHR, ugo+rw */
@@ -683,7 +682,7 @@ static void console_poll_one(CPUState *cs, gdb_syscall_complete_cb complete,
*/
void semihost_sys_open(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
+ vaddr fname, uint64_t fname_len,
int gdb_flags, int mode)
{
if (use_gdb_syscalls()) {
@@ -719,7 +718,7 @@ void semihost_sys_close(CPUState *cs, gdb_syscall_complete_cb complete, int fd)
}
void semihost_sys_read_gf(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
/*
* Bound length for 64-bit guests on 32-bit hosts, not overflowing ssize_t.
@@ -748,7 +747,7 @@ void semihost_sys_read_gf(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_read(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong buf, target_ulong len)
+ int fd, vaddr buf, uint64_t len)
{
GuestFD *gf = get_guestfd(fd);
@@ -760,7 +759,7 @@ void semihost_sys_read(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_write_gf(CPUState *cs, gdb_syscall_complete_cb complete,
- GuestFD *gf, target_ulong buf, target_ulong len)
+ GuestFD *gf, vaddr buf, uint64_t len)
{
/*
* Bound length for 64-bit guests on 32-bit hosts, not overflowing ssize_t.
@@ -790,7 +789,7 @@ void semihost_sys_write_gf(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_write(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong buf, target_ulong len)
+ int fd, vaddr buf, uint64_t len)
{
GuestFD *gf = get_guestfd(fd);
@@ -856,7 +855,7 @@ void semihost_sys_isatty(CPUState *cs, gdb_syscall_complete_cb complete, int fd)
void semihost_sys_flen(CPUState *cs, gdb_syscall_complete_cb fstat_cb,
gdb_syscall_complete_cb flen_cb, int fd,
- target_ulong fstat_addr)
+ vaddr fstat_addr)
{
GuestFD *gf = get_guestfd(fd);
@@ -881,7 +880,7 @@ void semihost_sys_flen(CPUState *cs, gdb_syscall_complete_cb fstat_cb,
}
void semihost_sys_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
- int fd, target_ulong addr)
+ int fd, vaddr addr)
{
GuestFD *gf = get_guestfd(fd);
@@ -906,8 +905,8 @@ void semihost_sys_fstat(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_stat(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len,
- target_ulong addr)
+ vaddr fname, uint64_t fname_len,
+ vaddr addr)
{
if (use_gdb_syscalls()) {
gdb_stat(cs, complete, fname, fname_len, addr);
@@ -917,7 +916,7 @@ void semihost_sys_stat(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_remove(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong fname, target_ulong fname_len)
+ vaddr fname, uint64_t fname_len)
{
if (use_gdb_syscalls()) {
gdb_remove(cs, complete, fname, fname_len);
@@ -927,8 +926,8 @@ void semihost_sys_remove(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_rename(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong oname, target_ulong oname_len,
- target_ulong nname, target_ulong nname_len)
+ vaddr oname, uint64_t oname_len,
+ vaddr nname, uint64_t nname_len)
{
if (use_gdb_syscalls()) {
gdb_rename(cs, complete, oname, oname_len, nname, nname_len);
@@ -938,7 +937,7 @@ void semihost_sys_rename(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_system(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong cmd, target_ulong cmd_len)
+ vaddr cmd, uint64_t cmd_len)
{
if (use_gdb_syscalls()) {
gdb_system(cs, complete, cmd, cmd_len);
@@ -948,7 +947,7 @@ void semihost_sys_system(CPUState *cs, gdb_syscall_complete_cb complete,
}
void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete,
- target_ulong tv_addr, target_ulong tz_addr)
+ vaddr tv_addr, vaddr tz_addr)
{
if (use_gdb_syscalls()) {
gdb_gettimeofday(cs, complete, tv_addr, tz_addr);
diff --git a/subprojects/.gitignore b/subprojects/.gitignore
index f428193..58a29f0 100644
--- a/subprojects/.gitignore
+++ b/subprojects/.gitignore
@@ -8,6 +8,7 @@
/slirp
/anyhow-1.0.98
/arbitrary-int-1.2.7
+/attrs-0.2.9
/bilge-0.2.0
/bilge-impl-0.2.0
/either-1.12.0
@@ -16,7 +17,10 @@
/libc-0.2.162
/proc-macro-error-1.0.4
/proc-macro-error-attr-1.0.4
-/proc-macro2-1.0.84
+/proc-macro2-1.0.95
/quote-1.0.36
/syn-2.0.66
/unicode-ident-1.0.12
+
+# Workaround for Meson v1.9.0 https://github.com/mesonbuild/meson/issues/14948
+/.wraplock
diff --git a/subprojects/attrs-0.2-rs.wrap b/subprojects/attrs-0.2-rs.wrap
new file mode 100644
index 0000000..cd43c91
--- /dev/null
+++ b/subprojects/attrs-0.2-rs.wrap
@@ -0,0 +1,7 @@
+[wrap-file]
+directory = attrs-0.2.9
+source_url = https://crates.io/api/v1/crates/attrs/0.2.9/download
+source_filename = attrs-0.2.9.tar.gz
+source_hash = 2a207d40f43de65285f3de0509bb6cb16bc46098864fce957122bbacce327e5f
+#method = cargo
+patch_directory = attrs-0.2-rs
diff --git a/subprojects/packagefiles/attrs-0.2-rs/meson.build b/subprojects/packagefiles/attrs-0.2-rs/meson.build
new file mode 100644
index 0000000..ee57547
--- /dev/null
+++ b/subprojects/packagefiles/attrs-0.2-rs/meson.build
@@ -0,0 +1,33 @@
+project('attrs-0.2-rs', 'rust',
+ meson_version: '>=1.5.0',
+ version: '0.2.9',
+ license: 'MIT OR Apache-2.0',
+ default_options: [])
+
+subproject('proc-macro2-1-rs', required: true)
+subproject('syn-2-rs', required: true)
+
+proc_macro2_dep = dependency('proc-macro2-1-rs', native: true)
+syn_dep = dependency('syn-2-rs', native: true)
+
+_attrs_rs = static_library(
+ 'attrs',
+ files('src/lib.rs'),
+ gnu_symbol_visibility: 'hidden',
+ override_options: ['rust_std=2021', 'build.rust_std=2021'],
+ rust_abi: 'rust',
+ rust_args: [
+ '--cap-lints', 'allow',
+ ],
+ dependencies: [
+ proc_macro2_dep,
+ syn_dep,
+ ],
+ native: true,
+)
+
+attrs_dep = declare_dependency(
+ link_with: _attrs_rs,
+)
+
+meson.override_dependency('attrs-0.2-rs', attrs_dep, native: true)
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 20c70c7..a788376 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -17,24 +17,12 @@
#include "qemu/main-loop.h"
#include "system/tcg.h"
#include "target/arm/multiprocessing.h"
-
-#ifndef DEBUG_ARM_POWERCTL
-#define DEBUG_ARM_POWERCTL 0
-#endif
-
-#define DPRINTF(fmt, args...) \
- do { \
- if (DEBUG_ARM_POWERCTL) { \
- fprintf(stderr, "[ARM]%s: " fmt , __func__, ##args); \
- } \
- } while (0)
+#include "trace.h"
CPUState *arm_get_cpu_by_id(uint64_t id)
{
CPUState *cpu;
- DPRINTF("cpu %" PRId64 "\n", id);
-
CPU_FOREACH(cpu) {
ARMCPU *armcpu = ARM_CPU(cpu);
@@ -102,9 +90,9 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
assert(bql_locked());
- DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
- "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry,
- context_id);
+ trace_arm_powerctl_set_cpu_on(cpuid, target_el,
+ target_aa64 ? "aarch64" : "aarch32",
+ entry, context_id);
/* requested EL level need to be in the 1 to 3 range */
assert((target_el > 0) && (target_el < 4));
@@ -208,6 +196,8 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid)
assert(bql_locked());
+ trace_arm_powerctl_set_cpu_on_and_reset(cpuid);
+
/* Retrieve the cpu we are powering up */
target_cpu_state = arm_get_cpu_by_id(cpuid);
if (!target_cpu_state) {
@@ -261,7 +251,7 @@ int arm_set_cpu_off(uint64_t cpuid)
assert(bql_locked());
- DPRINTF("cpu %" PRId64 "\n", cpuid);
+ trace_arm_powerctl_set_cpu_off(cpuid);
/* change to the cpu we are powering up */
target_cpu_state = arm_get_cpu_by_id(cpuid);
@@ -297,7 +287,7 @@ int arm_reset_cpu(uint64_t cpuid)
assert(bql_locked());
- DPRINTF("cpu %" PRId64 "\n", cpuid);
+ trace_arm_powerctl_set_cpu_off(cpuid);
/* change to the cpu we are resetting */
target_cpu_state = arm_get_cpu_by_id(cpuid);
diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.c
index da51f2d..2b77ce9 100644
--- a/target/arm/common-semi-target.h
+++ b/target/arm/common-semi-target.c
@@ -7,12 +7,12 @@
* SPDX-License-Identifier: GPL-2.0-or-later
*/
-#ifndef TARGET_ARM_COMMON_SEMI_TARGET_H
-#define TARGET_ARM_COMMON_SEMI_TARGET_H
-
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "semihosting/common-semi.h"
#include "target/arm/cpu-qom.h"
-static inline target_ulong common_semi_arg(CPUState *cs, int argno)
+uint64_t common_semi_arg(CPUState *cs, int argno)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
@@ -23,7 +23,7 @@ static inline target_ulong common_semi_arg(CPUState *cs, int argno)
}
}
-static inline void common_semi_set_ret(CPUState *cs, target_ulong ret)
+void common_semi_set_ret(CPUState *cs, uint64_t ret)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
@@ -34,27 +34,25 @@ static inline void common_semi_set_ret(CPUState *cs, target_ulong ret)
}
}
-static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr)
+bool common_semi_sys_exit_is_extended(CPUState *cs)
{
- return nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cpu_env(cs));
+ return is_a64(cpu_env(cs));
}
-static inline bool is_64bit_semihosting(CPUArchState *env)
+bool is_64bit_semihosting(CPUArchState *env)
{
return is_a64(env);
}
-static inline target_ulong common_semi_stack_bottom(CPUState *cs)
+uint64_t common_semi_stack_bottom(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
return is_a64(env) ? env->xregs[31] : env->regs[13];
}
-static inline bool common_semi_has_synccache(CPUArchState *env)
+bool common_semi_has_synccache(CPUArchState *env)
{
/* Ok for A64, invalid for A32/T32 */
return is_a64(env);
}
-
-#endif
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index 2a4826f..57fde5f 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -22,6 +22,7 @@
#define TARGET_ARM_CPREGS_H
#include "hw/registerfields.h"
+#include "exec/memop.h"
#include "target/arm/kvm-consts.h"
#include "cpu.h"
@@ -174,16 +175,20 @@ enum {
* add a bit to distinguish between secure and non-secure cpregs in the
* hashtable.
*/
-#define CP_REG_NS_SHIFT 29
-#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
+#define CP_REG_AA32_NS_SHIFT 29
+#define CP_REG_AA32_NS_MASK (1 << CP_REG_AA32_NS_SHIFT)
+
+/* Distinguish 32-bit and 64-bit views of AArch32 system registers. */
+#define CP_REG_AA32_64BIT_SHIFT 15
+#define CP_REG_AA32_64BIT_MASK (1 << CP_REG_AA32_64BIT_SHIFT)
#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \
- ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \
- ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
+ (((ns) << CP_REG_AA32_NS_SHIFT) | \
+ ((is64) << CP_REG_AA32_64BIT_SHIFT) | \
+ ((cp) << 16) | ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
-#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
- (CP_REG_AA64_MASK | \
- ((cp) << CP_REG_ARM_COPROC_SHIFT) | \
+#define ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2) \
+ (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \
((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \
((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \
((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \
@@ -201,14 +206,14 @@ static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
cpregid |= CP_REG_AA64_MASK;
} else {
if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
- cpregid |= (1 << 15);
+ cpregid |= CP_REG_AA32_64BIT_MASK;
}
/*
* KVM is always non-secure so add the NS flag on AArch32 register
* entries.
*/
- cpregid |= 1 << CP_REG_NS_SHIFT;
+ cpregid |= CP_REG_AA32_NS_MASK;
}
return cpregid;
}
@@ -225,8 +230,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
kvmid = cpregid & ~CP_REG_AA64_MASK;
kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
} else {
- kvmid = cpregid & ~(1 << 15);
- if (cpregid & (1 << 15)) {
+ kvmid = cpregid & ~CP_REG_AA32_64BIT_MASK;
+ if (cpregid & CP_REG_AA32_64BIT_MASK) {
kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
} else {
kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
@@ -866,15 +871,15 @@ typedef struct ARMCPRegInfo ARMCPRegInfo;
* Access functions for coprocessor registers. These cannot fail and
* may not raise exceptions.
*/
-typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque);
-typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
+typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *ri);
+typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value);
/* Access permission check functions for coprocessor registers. */
typedef CPAccessResult CPAccessFn(CPUARMState *env,
- const ARMCPRegInfo *opaque,
+ const ARMCPRegInfo *ri,
bool isread);
/* Hook function for register reset */
-typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque);
+typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *ri);
#define CP_ANY 0xff
@@ -932,11 +937,19 @@ struct ARMCPRegInfo {
uint32_t nv2_redirect_offset;
/*
- * The opaque pointer passed to define_arm_cp_regs_with_opaque() when
- * this register was defined: can be used to hand data through to the
- * register read/write functions, since they are passed the ARMCPRegInfo*.
+ * With VHE, with E2H, at EL2, access to this EL0/EL1 reg redirects
+ * to the EL2 reg with the specified key.
*/
- void *opaque;
+ uint32_t vhe_redir_to_el2;
+
+ /*
+ * For VHE. Before registration, this field holds the key for an
+ * EL02/EL12 reg to be created to point back to this EL0/EL1 reg.
+ * After registration, this field is set only on the EL02/EL12 reg
+ * and points back to the EL02/EL12 reg for redirection with E2H.
+ */
+ uint32_t vhe_redir_to_el01;
+
/*
* Value of this register, if it is ARM_CP_CONST. Otherwise, if
* fieldoffset is non-zero, the reset value of the register.
@@ -1004,52 +1017,17 @@ struct ARMCPRegInfo {
* fieldoffset is 0 then no reset will be done.
*/
CPResetFn *resetfn;
-
- /*
- * "Original" readfn, writefn, accessfn.
- * For ARMv8.1-VHE register aliases, we overwrite the read/write
- * accessor functions of various EL1/EL0 to perform the runtime
- * check for which sysreg should actually be modified, and then
- * forwards the operation. Before overwriting the accessors,
- * the original function is copied here, so that accesses that
- * really do go to the EL1/EL0 version proceed normally.
- * (The corresponding EL2 register is linked via opaque.)
- */
- CPReadFn *orig_readfn;
- CPWriteFn *orig_writefn;
- CPAccessFn *orig_accessfn;
};
-/*
- * Macros which are lvalues for the field in CPUARMState for the
- * ARMCPRegInfo *ri.
- */
-#define CPREG_FIELD32(env, ri) \
- (*(uint32_t *)((char *)(env) + (ri)->fieldoffset))
-#define CPREG_FIELD64(env, ri) \
- (*(uint64_t *)((char *)(env) + (ri)->fieldoffset))
+void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs);
+void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len);
-void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *reg,
- void *opaque);
-
-static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
-{
- define_one_arm_cp_reg_with_opaque(cpu, regs, NULL);
-}
-
-void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs,
- void *opaque, size_t len);
-
-#define define_arm_cp_regs_with_opaque(CPU, REGS, OPAQUE) \
- do { \
- QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \
- define_arm_cp_regs_with_opaque_len(CPU, REGS, OPAQUE, \
- ARRAY_SIZE(REGS)); \
+#define define_arm_cp_regs(CPU, REGS) \
+ do { \
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \
+ define_arm_cp_regs_len(CPU, REGS, ARRAY_SIZE(REGS)); \
} while (0)
-#define define_arm_cp_regs(CPU, REGS) \
- define_arm_cp_regs_with_opaque(CPU, REGS, NULL)
-
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
/*
@@ -1100,15 +1078,16 @@ void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value);
* CPResetFn that does nothing, for use if no reset is required even
* if fieldoffset is non zero.
*/
-void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
+void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri);
/*
- * Return true if this reginfo struct's field in the cpu state struct
- * is 64 bits wide.
+ * Return MO_32 if the field in CPUARMState is uint32_t or
+ * MO_64 if the field in CPUARMState is uint64_t.
*/
-static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
+static inline MemOp cpreg_field_type(const ARMCPRegInfo *ri)
{
- return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
+ return (ri->state == ARM_CP_STATE_AA64 || (ri->type & ARM_CP_64BIT)
+ ? MO_64 : MO_32);
}
static inline bool cp_access_ok(int current_el,
@@ -1168,7 +1147,7 @@ static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri)
* means that the right set of registers is exactly those where
* the opc1 field is 4 or 5. (You can see this also in the assert
* we do that the opc1 field and the permissions mask line up in
- * define_one_arm_cp_reg_with_opaque().)
+ * define_one_arm_cp_reg().)
* Checking the opc1 field is easier for us and avoids the problem
* that we do not consistently use the right architectural names
* for all sysregs, since we treat the name field as largely for debug.
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 512eeaf..602f6a8 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -26,6 +26,421 @@
#include "cpu-sysregs.h"
/*
+ * System register ID fields.
+ */
+FIELD(CLIDR_EL1, CTYPE1, 0, 3)
+FIELD(CLIDR_EL1, CTYPE2, 3, 3)
+FIELD(CLIDR_EL1, CTYPE3, 6, 3)
+FIELD(CLIDR_EL1, CTYPE4, 9, 3)
+FIELD(CLIDR_EL1, CTYPE5, 12, 3)
+FIELD(CLIDR_EL1, CTYPE6, 15, 3)
+FIELD(CLIDR_EL1, CTYPE7, 18, 3)
+FIELD(CLIDR_EL1, LOUIS, 21, 3)
+FIELD(CLIDR_EL1, LOC, 24, 3)
+FIELD(CLIDR_EL1, LOUU, 27, 3)
+FIELD(CLIDR_EL1, ICB, 30, 3)
+
+/* When FEAT_CCIDX is implemented */
+FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3)
+FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21)
+FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24)
+
+/* When FEAT_CCIDX is not implemented */
+FIELD(CCSIDR_EL1, LINESIZE, 0, 3)
+FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10)
+FIELD(CCSIDR_EL1, NUMSETS, 13, 15)
+
+FIELD(CTR_EL0, IMINLINE, 0, 4)
+FIELD(CTR_EL0, L1IP, 14, 2)
+FIELD(CTR_EL0, DMINLINE, 16, 4)
+FIELD(CTR_EL0, ERG, 20, 4)
+FIELD(CTR_EL0, CWG, 24, 4)
+FIELD(CTR_EL0, IDC, 28, 1)
+FIELD(CTR_EL0, DIC, 29, 1)
+FIELD(CTR_EL0, TMINLINE, 32, 6)
+
+FIELD(MIDR_EL1, REVISION, 0, 4)
+FIELD(MIDR_EL1, PARTNUM, 4, 12)
+FIELD(MIDR_EL1, ARCHITECTURE, 16, 4)
+FIELD(MIDR_EL1, VARIANT, 20, 4)
+FIELD(MIDR_EL1, IMPLEMENTER, 24, 8)
+
+FIELD(ID_ISAR0, SWAP, 0, 4)
+FIELD(ID_ISAR0, BITCOUNT, 4, 4)
+FIELD(ID_ISAR0, BITFIELD, 8, 4)
+FIELD(ID_ISAR0, CMPBRANCH, 12, 4)
+FIELD(ID_ISAR0, COPROC, 16, 4)
+FIELD(ID_ISAR0, DEBUG, 20, 4)
+FIELD(ID_ISAR0, DIVIDE, 24, 4)
+
+FIELD(ID_ISAR1, ENDIAN, 0, 4)
+FIELD(ID_ISAR1, EXCEPT, 4, 4)
+FIELD(ID_ISAR1, EXCEPT_AR, 8, 4)
+FIELD(ID_ISAR1, EXTEND, 12, 4)
+FIELD(ID_ISAR1, IFTHEN, 16, 4)
+FIELD(ID_ISAR1, IMMEDIATE, 20, 4)
+FIELD(ID_ISAR1, INTERWORK, 24, 4)
+FIELD(ID_ISAR1, JAZELLE, 28, 4)
+
+FIELD(ID_ISAR2, LOADSTORE, 0, 4)
+FIELD(ID_ISAR2, MEMHINT, 4, 4)
+FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4)
+FIELD(ID_ISAR2, MULT, 12, 4)
+FIELD(ID_ISAR2, MULTS, 16, 4)
+FIELD(ID_ISAR2, MULTU, 20, 4)
+FIELD(ID_ISAR2, PSR_AR, 24, 4)
+FIELD(ID_ISAR2, REVERSAL, 28, 4)
+
+FIELD(ID_ISAR3, SATURATE, 0, 4)
+FIELD(ID_ISAR3, SIMD, 4, 4)
+FIELD(ID_ISAR3, SVC, 8, 4)
+FIELD(ID_ISAR3, SYNCHPRIM, 12, 4)
+FIELD(ID_ISAR3, TABBRANCH, 16, 4)
+FIELD(ID_ISAR3, T32COPY, 20, 4)
+FIELD(ID_ISAR3, TRUENOP, 24, 4)
+FIELD(ID_ISAR3, T32EE, 28, 4)
+
+FIELD(ID_ISAR4, UNPRIV, 0, 4)
+FIELD(ID_ISAR4, WITHSHIFTS, 4, 4)
+FIELD(ID_ISAR4, WRITEBACK, 8, 4)
+FIELD(ID_ISAR4, SMC, 12, 4)
+FIELD(ID_ISAR4, BARRIER, 16, 4)
+FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4)
+FIELD(ID_ISAR4, PSR_M, 24, 4)
+FIELD(ID_ISAR4, SWP_FRAC, 28, 4)
+
+FIELD(ID_ISAR5, SEVL, 0, 4)
+FIELD(ID_ISAR5, AES, 4, 4)
+FIELD(ID_ISAR5, SHA1, 8, 4)
+FIELD(ID_ISAR5, SHA2, 12, 4)
+FIELD(ID_ISAR5, CRC32, 16, 4)
+FIELD(ID_ISAR5, RDM, 24, 4)
+FIELD(ID_ISAR5, VCMA, 28, 4)
+
+FIELD(ID_ISAR6, JSCVT, 0, 4)
+FIELD(ID_ISAR6, DP, 4, 4)
+FIELD(ID_ISAR6, FHM, 8, 4)
+FIELD(ID_ISAR6, SB, 12, 4)
+FIELD(ID_ISAR6, SPECRES, 16, 4)
+FIELD(ID_ISAR6, BF16, 20, 4)
+FIELD(ID_ISAR6, I8MM, 24, 4)
+
+FIELD(ID_MMFR0, VMSA, 0, 4)
+FIELD(ID_MMFR0, PMSA, 4, 4)
+FIELD(ID_MMFR0, OUTERSHR, 8, 4)
+FIELD(ID_MMFR0, SHARELVL, 12, 4)
+FIELD(ID_MMFR0, TCM, 16, 4)
+FIELD(ID_MMFR0, AUXREG, 20, 4)
+FIELD(ID_MMFR0, FCSE, 24, 4)
+FIELD(ID_MMFR0, INNERSHR, 28, 4)
+
+FIELD(ID_MMFR1, L1HVDVA, 0, 4)
+FIELD(ID_MMFR1, L1UNIVA, 4, 4)
+FIELD(ID_MMFR1, L1HVDSW, 8, 4)
+FIELD(ID_MMFR1, L1UNISW, 12, 4)
+FIELD(ID_MMFR1, L1HVD, 16, 4)
+FIELD(ID_MMFR1, L1UNI, 20, 4)
+FIELD(ID_MMFR1, L1TSTCLN, 24, 4)
+FIELD(ID_MMFR1, BPRED, 28, 4)
+
+FIELD(ID_MMFR2, L1HVDFG, 0, 4)
+FIELD(ID_MMFR2, L1HVDBG, 4, 4)
+FIELD(ID_MMFR2, L1HVDRNG, 8, 4)
+FIELD(ID_MMFR2, HVDTLB, 12, 4)
+FIELD(ID_MMFR2, UNITLB, 16, 4)
+FIELD(ID_MMFR2, MEMBARR, 20, 4)
+FIELD(ID_MMFR2, WFISTALL, 24, 4)
+FIELD(ID_MMFR2, HWACCFLG, 28, 4)
+
+FIELD(ID_MMFR3, CMAINTVA, 0, 4)
+FIELD(ID_MMFR3, CMAINTSW, 4, 4)
+FIELD(ID_MMFR3, BPMAINT, 8, 4)
+FIELD(ID_MMFR3, MAINTBCST, 12, 4)
+FIELD(ID_MMFR3, PAN, 16, 4)
+FIELD(ID_MMFR3, COHWALK, 20, 4)
+FIELD(ID_MMFR3, CMEMSZ, 24, 4)
+FIELD(ID_MMFR3, SUPERSEC, 28, 4)
+
+FIELD(ID_MMFR4, SPECSEI, 0, 4)
+FIELD(ID_MMFR4, AC2, 4, 4)
+FIELD(ID_MMFR4, XNX, 8, 4)
+FIELD(ID_MMFR4, CNP, 12, 4)
+FIELD(ID_MMFR4, HPDS, 16, 4)
+FIELD(ID_MMFR4, LSM, 20, 4)
+FIELD(ID_MMFR4, CCIDX, 24, 4)
+FIELD(ID_MMFR4, EVT, 28, 4)
+
+FIELD(ID_MMFR5, ETS, 0, 4)
+FIELD(ID_MMFR5, NTLBPA, 4, 4)
+
+FIELD(ID_PFR0, STATE0, 0, 4)
+FIELD(ID_PFR0, STATE1, 4, 4)
+FIELD(ID_PFR0, STATE2, 8, 4)
+FIELD(ID_PFR0, STATE3, 12, 4)
+FIELD(ID_PFR0, CSV2, 16, 4)
+FIELD(ID_PFR0, AMU, 20, 4)
+FIELD(ID_PFR0, DIT, 24, 4)
+FIELD(ID_PFR0, RAS, 28, 4)
+
+FIELD(ID_PFR1, PROGMOD, 0, 4)
+FIELD(ID_PFR1, SECURITY, 4, 4)
+FIELD(ID_PFR1, MPROGMOD, 8, 4)
+FIELD(ID_PFR1, VIRTUALIZATION, 12, 4)
+FIELD(ID_PFR1, GENTIMER, 16, 4)
+FIELD(ID_PFR1, SEC_FRAC, 20, 4)
+FIELD(ID_PFR1, VIRT_FRAC, 24, 4)
+FIELD(ID_PFR1, GIC, 28, 4)
+
+FIELD(ID_PFR2, CSV3, 0, 4)
+FIELD(ID_PFR2, SSBS, 4, 4)
+FIELD(ID_PFR2, RAS_FRAC, 8, 4)
+
+FIELD(ID_AA64ISAR0, AES, 4, 4)
+FIELD(ID_AA64ISAR0, SHA1, 8, 4)
+FIELD(ID_AA64ISAR0, SHA2, 12, 4)
+FIELD(ID_AA64ISAR0, CRC32, 16, 4)
+FIELD(ID_AA64ISAR0, ATOMIC, 20, 4)
+FIELD(ID_AA64ISAR0, TME, 24, 4)
+FIELD(ID_AA64ISAR0, RDM, 28, 4)
+FIELD(ID_AA64ISAR0, SHA3, 32, 4)
+FIELD(ID_AA64ISAR0, SM3, 36, 4)
+FIELD(ID_AA64ISAR0, SM4, 40, 4)
+FIELD(ID_AA64ISAR0, DP, 44, 4)
+FIELD(ID_AA64ISAR0, FHM, 48, 4)
+FIELD(ID_AA64ISAR0, TS, 52, 4)
+FIELD(ID_AA64ISAR0, TLB, 56, 4)
+FIELD(ID_AA64ISAR0, RNDR, 60, 4)
+
+FIELD(ID_AA64ISAR1, DPB, 0, 4)
+FIELD(ID_AA64ISAR1, APA, 4, 4)
+FIELD(ID_AA64ISAR1, API, 8, 4)
+FIELD(ID_AA64ISAR1, JSCVT, 12, 4)
+FIELD(ID_AA64ISAR1, FCMA, 16, 4)
+FIELD(ID_AA64ISAR1, LRCPC, 20, 4)
+FIELD(ID_AA64ISAR1, GPA, 24, 4)
+FIELD(ID_AA64ISAR1, GPI, 28, 4)
+FIELD(ID_AA64ISAR1, FRINTTS, 32, 4)
+FIELD(ID_AA64ISAR1, SB, 36, 4)
+FIELD(ID_AA64ISAR1, SPECRES, 40, 4)
+FIELD(ID_AA64ISAR1, BF16, 44, 4)
+FIELD(ID_AA64ISAR1, DGH, 48, 4)
+FIELD(ID_AA64ISAR1, I8MM, 52, 4)
+FIELD(ID_AA64ISAR1, XS, 56, 4)
+FIELD(ID_AA64ISAR1, LS64, 60, 4)
+
+FIELD(ID_AA64ISAR2, WFXT, 0, 4)
+FIELD(ID_AA64ISAR2, RPRES, 4, 4)
+FIELD(ID_AA64ISAR2, GPA3, 8, 4)
+FIELD(ID_AA64ISAR2, APA3, 12, 4)
+FIELD(ID_AA64ISAR2, MOPS, 16, 4)
+FIELD(ID_AA64ISAR2, BC, 20, 4)
+FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4)
+FIELD(ID_AA64ISAR2, CLRBHB, 28, 4)
+FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4)
+FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
+FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
+FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
+FIELD(ID_AA64ISAR2, CSSC, 52, 4)
+FIELD(ID_AA64ISAR2, LUT, 56, 4)
+FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
+
+FIELD(ID_AA64PFR0, EL0, 0, 4)
+FIELD(ID_AA64PFR0, EL1, 4, 4)
+FIELD(ID_AA64PFR0, EL2, 8, 4)
+FIELD(ID_AA64PFR0, EL3, 12, 4)
+FIELD(ID_AA64PFR0, FP, 16, 4)
+FIELD(ID_AA64PFR0, ADVSIMD, 20, 4)
+FIELD(ID_AA64PFR0, GIC, 24, 4)
+FIELD(ID_AA64PFR0, RAS, 28, 4)
+FIELD(ID_AA64PFR0, SVE, 32, 4)
+FIELD(ID_AA64PFR0, SEL2, 36, 4)
+FIELD(ID_AA64PFR0, MPAM, 40, 4)
+FIELD(ID_AA64PFR0, AMU, 44, 4)
+FIELD(ID_AA64PFR0, DIT, 48, 4)
+FIELD(ID_AA64PFR0, RME, 52, 4)
+FIELD(ID_AA64PFR0, CSV2, 56, 4)
+FIELD(ID_AA64PFR0, CSV3, 60, 4)
+
+FIELD(ID_AA64PFR1, BT, 0, 4)
+FIELD(ID_AA64PFR1, SSBS, 4, 4)
+FIELD(ID_AA64PFR1, MTE, 8, 4)
+FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4)
+FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4)
+FIELD(ID_AA64PFR1, SME, 24, 4)
+FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4)
+FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4)
+FIELD(ID_AA64PFR1, NMI, 36, 4)
+FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4)
+FIELD(ID_AA64PFR1, GCS, 44, 4)
+FIELD(ID_AA64PFR1, THE, 48, 4)
+FIELD(ID_AA64PFR1, MTEX, 52, 4)
+FIELD(ID_AA64PFR1, DF2, 56, 4)
+FIELD(ID_AA64PFR1, PFAR, 60, 4)
+
+FIELD(ID_AA64PFR2, MTEPERM, 0, 4)
+FIELD(ID_AA64PFR2, MTESTOREONLY, 4, 4)
+FIELD(ID_AA64PFR2, MTEFAR, 8, 4)
+FIELD(ID_AA64PFR2, FPMR, 32, 4)
+
+FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
+FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
+FIELD(ID_AA64MMFR0, BIGEND, 8, 4)
+FIELD(ID_AA64MMFR0, SNSMEM, 12, 4)
+FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4)
+FIELD(ID_AA64MMFR0, TGRAN16, 20, 4)
+FIELD(ID_AA64MMFR0, TGRAN64, 24, 4)
+FIELD(ID_AA64MMFR0, TGRAN4, 28, 4)
+FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4)
+FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4)
+FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4)
+FIELD(ID_AA64MMFR0, EXS, 44, 4)
+FIELD(ID_AA64MMFR0, FGT, 56, 4)
+FIELD(ID_AA64MMFR0, ECV, 60, 4)
+
+FIELD(ID_AA64MMFR1, HAFDBS, 0, 4)
+FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4)
+FIELD(ID_AA64MMFR1, VH, 8, 4)
+FIELD(ID_AA64MMFR1, HPDS, 12, 4)
+FIELD(ID_AA64MMFR1, LO, 16, 4)
+FIELD(ID_AA64MMFR1, PAN, 20, 4)
+FIELD(ID_AA64MMFR1, SPECSEI, 24, 4)
+FIELD(ID_AA64MMFR1, XNX, 28, 4)
+FIELD(ID_AA64MMFR1, TWED, 32, 4)
+FIELD(ID_AA64MMFR1, ETS, 36, 4)
+FIELD(ID_AA64MMFR1, HCX, 40, 4)
+FIELD(ID_AA64MMFR1, AFP, 44, 4)
+FIELD(ID_AA64MMFR1, NTLBPA, 48, 4)
+FIELD(ID_AA64MMFR1, TIDCP1, 52, 4)
+FIELD(ID_AA64MMFR1, CMOW, 56, 4)
+FIELD(ID_AA64MMFR1, ECBHB, 60, 4)
+
+FIELD(ID_AA64MMFR2, CNP, 0, 4)
+FIELD(ID_AA64MMFR2, UAO, 4, 4)
+FIELD(ID_AA64MMFR2, LSM, 8, 4)
+FIELD(ID_AA64MMFR2, IESB, 12, 4)
+FIELD(ID_AA64MMFR2, VARANGE, 16, 4)
+FIELD(ID_AA64MMFR2, CCIDX, 20, 4)
+FIELD(ID_AA64MMFR2, NV, 24, 4)
+FIELD(ID_AA64MMFR2, ST, 28, 4)
+FIELD(ID_AA64MMFR2, AT, 32, 4)
+FIELD(ID_AA64MMFR2, IDS, 36, 4)
+FIELD(ID_AA64MMFR2, FWB, 40, 4)
+FIELD(ID_AA64MMFR2, TTL, 48, 4)
+FIELD(ID_AA64MMFR2, BBM, 52, 4)
+FIELD(ID_AA64MMFR2, EVT, 56, 4)
+FIELD(ID_AA64MMFR2, E0PD, 60, 4)
+
+FIELD(ID_AA64MMFR3, TCRX, 0, 4)
+FIELD(ID_AA64MMFR3, SCTLRX, 4, 4)
+FIELD(ID_AA64MMFR3, S1PIE, 8, 4)
+FIELD(ID_AA64MMFR3, S2PIE, 12, 4)
+FIELD(ID_AA64MMFR3, S1POE, 16, 4)
+FIELD(ID_AA64MMFR3, S2POE, 20, 4)
+FIELD(ID_AA64MMFR3, AIE, 24, 4)
+FIELD(ID_AA64MMFR3, MEC, 28, 4)
+FIELD(ID_AA64MMFR3, D128, 32, 4)
+FIELD(ID_AA64MMFR3, D128_2, 36, 4)
+FIELD(ID_AA64MMFR3, SNERR, 40, 4)
+FIELD(ID_AA64MMFR3, ANERR, 44, 4)
+FIELD(ID_AA64MMFR3, SDERR, 52, 4)
+FIELD(ID_AA64MMFR3, ADERR, 56, 4)
+FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4)
+
+FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
+FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
+FIELD(ID_AA64DFR0, PMUVER, 8, 4)
+FIELD(ID_AA64DFR0, BRPS, 12, 4)
+FIELD(ID_AA64DFR0, PMSS, 16, 4)
+FIELD(ID_AA64DFR0, WRPS, 20, 4)
+FIELD(ID_AA64DFR0, SEBEP, 24, 4)
+FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4)
+FIELD(ID_AA64DFR0, PMSVER, 32, 4)
+FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
+FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
+FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4)
+FIELD(ID_AA64DFR0, MTPMU, 48, 4)
+FIELD(ID_AA64DFR0, BRBE, 52, 4)
+FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4)
+FIELD(ID_AA64DFR0, HPMN0, 60, 4)
+
+FIELD(ID_AA64ZFR0, SVEVER, 0, 4)
+FIELD(ID_AA64ZFR0, AES, 4, 4)
+FIELD(ID_AA64ZFR0, BITPERM, 16, 4)
+FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4)
+FIELD(ID_AA64ZFR0, B16B16, 24, 4)
+FIELD(ID_AA64ZFR0, SHA3, 32, 4)
+FIELD(ID_AA64ZFR0, SM4, 40, 4)
+FIELD(ID_AA64ZFR0, I8MM, 44, 4)
+FIELD(ID_AA64ZFR0, F32MM, 52, 4)
+FIELD(ID_AA64ZFR0, F64MM, 56, 4)
+
+FIELD(ID_AA64SMFR0, F32F32, 32, 1)
+FIELD(ID_AA64SMFR0, BI32I32, 33, 1)
+FIELD(ID_AA64SMFR0, B16F32, 34, 1)
+FIELD(ID_AA64SMFR0, F16F32, 35, 1)
+FIELD(ID_AA64SMFR0, I8I32, 36, 4)
+FIELD(ID_AA64SMFR0, F16F16, 42, 1)
+FIELD(ID_AA64SMFR0, B16B16, 43, 1)
+FIELD(ID_AA64SMFR0, I16I32, 44, 4)
+FIELD(ID_AA64SMFR0, F64F64, 48, 1)
+FIELD(ID_AA64SMFR0, I16I64, 52, 4)
+FIELD(ID_AA64SMFR0, SMEVER, 56, 4)
+FIELD(ID_AA64SMFR0, FA64, 63, 1)
+
+FIELD(ID_DFR0, COPDBG, 0, 4)
+FIELD(ID_DFR0, COPSDBG, 4, 4)
+FIELD(ID_DFR0, MMAPDBG, 8, 4)
+FIELD(ID_DFR0, COPTRC, 12, 4)
+FIELD(ID_DFR0, MMAPTRC, 16, 4)
+FIELD(ID_DFR0, MPROFDBG, 20, 4)
+FIELD(ID_DFR0, PERFMON, 24, 4)
+FIELD(ID_DFR0, TRACEFILT, 28, 4)
+
+FIELD(ID_DFR1, MTPMU, 0, 4)
+FIELD(ID_DFR1, HPMN0, 4, 4)
+
+FIELD(DBGDIDR, SE_IMP, 12, 1)
+FIELD(DBGDIDR, NSUHD_IMP, 14, 1)
+FIELD(DBGDIDR, VERSION, 16, 4)
+FIELD(DBGDIDR, CTX_CMPS, 20, 4)
+FIELD(DBGDIDR, BRPS, 24, 4)
+FIELD(DBGDIDR, WRPS, 28, 4)
+
+FIELD(DBGDEVID, PCSAMPLE, 0, 4)
+FIELD(DBGDEVID, WPADDRMASK, 4, 4)
+FIELD(DBGDEVID, BPADDRMASK, 8, 4)
+FIELD(DBGDEVID, VECTORCATCH, 12, 4)
+FIELD(DBGDEVID, VIRTEXTNS, 16, 4)
+FIELD(DBGDEVID, DOUBLELOCK, 20, 4)
+FIELD(DBGDEVID, AUXREGS, 24, 4)
+FIELD(DBGDEVID, CIDMASK, 28, 4)
+
+FIELD(DBGDEVID1, PCSROFFSET, 0, 4)
+
+FIELD(MVFR0, SIMDREG, 0, 4)
+FIELD(MVFR0, FPSP, 4, 4)
+FIELD(MVFR0, FPDP, 8, 4)
+FIELD(MVFR0, FPTRAP, 12, 4)
+FIELD(MVFR0, FPDIVIDE, 16, 4)
+FIELD(MVFR0, FPSQRT, 20, 4)
+FIELD(MVFR0, FPSHVEC, 24, 4)
+FIELD(MVFR0, FPROUND, 28, 4)
+
+FIELD(MVFR1, FPFTZ, 0, 4)
+FIELD(MVFR1, FPDNAN, 4, 4)
+FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */
+FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */
+FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */
+FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */
+FIELD(MVFR1, MVE, 8, 4) /* M-profile only */
+FIELD(MVFR1, FP16, 20, 4) /* M-profile only */
+FIELD(MVFR1, FPHP, 24, 4)
+FIELD(MVFR1, SIMDFMAC, 28, 4)
+
+FIELD(MVFR2, SIMDMISC, 0, 4)
+FIELD(MVFR2, FPMISC, 4, 4)
+
+/*
* Naming convention for isar_feature functions:
* Functions which test 32-bit ID registers should have _aa32_ in
* their name. Functions which test 64-bit ID registers should have
diff --git a/target/arm/cpu-sysregs.h.inc b/target/arm/cpu-sysregs.h.inc
index f48a9da..2bb2861 100644
--- a/target/arm/cpu-sysregs.h.inc
+++ b/target/arm/cpu-sysregs.h.inc
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
DEF(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0)
DEF(ID_AA64PFR1_EL1, 3, 0, 0, 4, 1)
+DEF(ID_AA64PFR2_EL1, 3, 0, 0, 4, 2)
DEF(ID_AA64SMFR0_EL1, 3, 0, 0, 4, 5)
DEF(ID_AA64DFR0_EL1, 3, 0, 0, 5, 0)
DEF(ID_AA64DFR1_EL1, 3, 0, 0, 5, 1)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index c65af7e..30e29fd 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -52,6 +52,8 @@
#include "target/arm/cpu-qom.h"
#include "target/arm/gtimer.h"
+#include "trace.h"
+
static void arm_cpu_set_pc(CPUState *cs, vaddr value)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -192,14 +194,8 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
* This is basically only used for fields in non-core coprocessors
* (like the pxa2xx ones).
*/
- if (!ri->fieldoffset) {
- return;
- }
-
- if (cpreg_field_is_64bit(ri)) {
- CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue;
- } else {
- CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue;
+ if (ri->fieldoffset) {
+ raw_write(&cpu->env, ri, ri->resetvalue);
}
}
@@ -231,6 +227,8 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj);
CPUARMState *env = &cpu->env;
+ trace_arm_cpu_reset(arm_cpu_mp_affinity(cpu));
+
if (acc->parent_phases.hold) {
acc->parent_phases.hold(obj, type);
}
@@ -580,6 +578,8 @@ void arm_emulate_firmware_reset(CPUState *cpustate, int target_el)
bool have_el3 = arm_feature(env, ARM_FEATURE_EL3);
bool have_el2 = arm_feature(env, ARM_FEATURE_EL2);
+ trace_arm_emulate_firmware_reset(arm_cpu_mp_affinity(cpu), target_el);
+
/*
* Check we have the EL we're aiming for. If that is the
* highest implemented EL, then cpu_reset has already done
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1c0deb7..2b9585d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1994,416 +1994,6 @@ FIELD(V7M_VPR, P0, 0, 16)
FIELD(V7M_VPR, MASK01, 16, 4)
FIELD(V7M_VPR, MASK23, 20, 4)
-/*
- * System register ID fields.
- */
-FIELD(CLIDR_EL1, CTYPE1, 0, 3)
-FIELD(CLIDR_EL1, CTYPE2, 3, 3)
-FIELD(CLIDR_EL1, CTYPE3, 6, 3)
-FIELD(CLIDR_EL1, CTYPE4, 9, 3)
-FIELD(CLIDR_EL1, CTYPE5, 12, 3)
-FIELD(CLIDR_EL1, CTYPE6, 15, 3)
-FIELD(CLIDR_EL1, CTYPE7, 18, 3)
-FIELD(CLIDR_EL1, LOUIS, 21, 3)
-FIELD(CLIDR_EL1, LOC, 24, 3)
-FIELD(CLIDR_EL1, LOUU, 27, 3)
-FIELD(CLIDR_EL1, ICB, 30, 3)
-
-/* When FEAT_CCIDX is implemented */
-FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3)
-FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21)
-FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24)
-
-/* When FEAT_CCIDX is not implemented */
-FIELD(CCSIDR_EL1, LINESIZE, 0, 3)
-FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10)
-FIELD(CCSIDR_EL1, NUMSETS, 13, 15)
-
-FIELD(CTR_EL0, IMINLINE, 0, 4)
-FIELD(CTR_EL0, L1IP, 14, 2)
-FIELD(CTR_EL0, DMINLINE, 16, 4)
-FIELD(CTR_EL0, ERG, 20, 4)
-FIELD(CTR_EL0, CWG, 24, 4)
-FIELD(CTR_EL0, IDC, 28, 1)
-FIELD(CTR_EL0, DIC, 29, 1)
-FIELD(CTR_EL0, TMINLINE, 32, 6)
-
-FIELD(MIDR_EL1, REVISION, 0, 4)
-FIELD(MIDR_EL1, PARTNUM, 4, 12)
-FIELD(MIDR_EL1, ARCHITECTURE, 16, 4)
-FIELD(MIDR_EL1, VARIANT, 20, 4)
-FIELD(MIDR_EL1, IMPLEMENTER, 24, 8)
-
-FIELD(ID_ISAR0, SWAP, 0, 4)
-FIELD(ID_ISAR0, BITCOUNT, 4, 4)
-FIELD(ID_ISAR0, BITFIELD, 8, 4)
-FIELD(ID_ISAR0, CMPBRANCH, 12, 4)
-FIELD(ID_ISAR0, COPROC, 16, 4)
-FIELD(ID_ISAR0, DEBUG, 20, 4)
-FIELD(ID_ISAR0, DIVIDE, 24, 4)
-
-FIELD(ID_ISAR1, ENDIAN, 0, 4)
-FIELD(ID_ISAR1, EXCEPT, 4, 4)
-FIELD(ID_ISAR1, EXCEPT_AR, 8, 4)
-FIELD(ID_ISAR1, EXTEND, 12, 4)
-FIELD(ID_ISAR1, IFTHEN, 16, 4)
-FIELD(ID_ISAR1, IMMEDIATE, 20, 4)
-FIELD(ID_ISAR1, INTERWORK, 24, 4)
-FIELD(ID_ISAR1, JAZELLE, 28, 4)
-
-FIELD(ID_ISAR2, LOADSTORE, 0, 4)
-FIELD(ID_ISAR2, MEMHINT, 4, 4)
-FIELD(ID_ISAR2, MULTIACCESSINT, 8, 4)
-FIELD(ID_ISAR2, MULT, 12, 4)
-FIELD(ID_ISAR2, MULTS, 16, 4)
-FIELD(ID_ISAR2, MULTU, 20, 4)
-FIELD(ID_ISAR2, PSR_AR, 24, 4)
-FIELD(ID_ISAR2, REVERSAL, 28, 4)
-
-FIELD(ID_ISAR3, SATURATE, 0, 4)
-FIELD(ID_ISAR3, SIMD, 4, 4)
-FIELD(ID_ISAR3, SVC, 8, 4)
-FIELD(ID_ISAR3, SYNCHPRIM, 12, 4)
-FIELD(ID_ISAR3, TABBRANCH, 16, 4)
-FIELD(ID_ISAR3, T32COPY, 20, 4)
-FIELD(ID_ISAR3, TRUENOP, 24, 4)
-FIELD(ID_ISAR3, T32EE, 28, 4)
-
-FIELD(ID_ISAR4, UNPRIV, 0, 4)
-FIELD(ID_ISAR4, WITHSHIFTS, 4, 4)
-FIELD(ID_ISAR4, WRITEBACK, 8, 4)
-FIELD(ID_ISAR4, SMC, 12, 4)
-FIELD(ID_ISAR4, BARRIER, 16, 4)
-FIELD(ID_ISAR4, SYNCHPRIM_FRAC, 20, 4)
-FIELD(ID_ISAR4, PSR_M, 24, 4)
-FIELD(ID_ISAR4, SWP_FRAC, 28, 4)
-
-FIELD(ID_ISAR5, SEVL, 0, 4)
-FIELD(ID_ISAR5, AES, 4, 4)
-FIELD(ID_ISAR5, SHA1, 8, 4)
-FIELD(ID_ISAR5, SHA2, 12, 4)
-FIELD(ID_ISAR5, CRC32, 16, 4)
-FIELD(ID_ISAR5, RDM, 24, 4)
-FIELD(ID_ISAR5, VCMA, 28, 4)
-
-FIELD(ID_ISAR6, JSCVT, 0, 4)
-FIELD(ID_ISAR6, DP, 4, 4)
-FIELD(ID_ISAR6, FHM, 8, 4)
-FIELD(ID_ISAR6, SB, 12, 4)
-FIELD(ID_ISAR6, SPECRES, 16, 4)
-FIELD(ID_ISAR6, BF16, 20, 4)
-FIELD(ID_ISAR6, I8MM, 24, 4)
-
-FIELD(ID_MMFR0, VMSA, 0, 4)
-FIELD(ID_MMFR0, PMSA, 4, 4)
-FIELD(ID_MMFR0, OUTERSHR, 8, 4)
-FIELD(ID_MMFR0, SHARELVL, 12, 4)
-FIELD(ID_MMFR0, TCM, 16, 4)
-FIELD(ID_MMFR0, AUXREG, 20, 4)
-FIELD(ID_MMFR0, FCSE, 24, 4)
-FIELD(ID_MMFR0, INNERSHR, 28, 4)
-
-FIELD(ID_MMFR1, L1HVDVA, 0, 4)
-FIELD(ID_MMFR1, L1UNIVA, 4, 4)
-FIELD(ID_MMFR1, L1HVDSW, 8, 4)
-FIELD(ID_MMFR1, L1UNISW, 12, 4)
-FIELD(ID_MMFR1, L1HVD, 16, 4)
-FIELD(ID_MMFR1, L1UNI, 20, 4)
-FIELD(ID_MMFR1, L1TSTCLN, 24, 4)
-FIELD(ID_MMFR1, BPRED, 28, 4)
-
-FIELD(ID_MMFR2, L1HVDFG, 0, 4)
-FIELD(ID_MMFR2, L1HVDBG, 4, 4)
-FIELD(ID_MMFR2, L1HVDRNG, 8, 4)
-FIELD(ID_MMFR2, HVDTLB, 12, 4)
-FIELD(ID_MMFR2, UNITLB, 16, 4)
-FIELD(ID_MMFR2, MEMBARR, 20, 4)
-FIELD(ID_MMFR2, WFISTALL, 24, 4)
-FIELD(ID_MMFR2, HWACCFLG, 28, 4)
-
-FIELD(ID_MMFR3, CMAINTVA, 0, 4)
-FIELD(ID_MMFR3, CMAINTSW, 4, 4)
-FIELD(ID_MMFR3, BPMAINT, 8, 4)
-FIELD(ID_MMFR3, MAINTBCST, 12, 4)
-FIELD(ID_MMFR3, PAN, 16, 4)
-FIELD(ID_MMFR3, COHWALK, 20, 4)
-FIELD(ID_MMFR3, CMEMSZ, 24, 4)
-FIELD(ID_MMFR3, SUPERSEC, 28, 4)
-
-FIELD(ID_MMFR4, SPECSEI, 0, 4)
-FIELD(ID_MMFR4, AC2, 4, 4)
-FIELD(ID_MMFR4, XNX, 8, 4)
-FIELD(ID_MMFR4, CNP, 12, 4)
-FIELD(ID_MMFR4, HPDS, 16, 4)
-FIELD(ID_MMFR4, LSM, 20, 4)
-FIELD(ID_MMFR4, CCIDX, 24, 4)
-FIELD(ID_MMFR4, EVT, 28, 4)
-
-FIELD(ID_MMFR5, ETS, 0, 4)
-FIELD(ID_MMFR5, NTLBPA, 4, 4)
-
-FIELD(ID_PFR0, STATE0, 0, 4)
-FIELD(ID_PFR0, STATE1, 4, 4)
-FIELD(ID_PFR0, STATE2, 8, 4)
-FIELD(ID_PFR0, STATE3, 12, 4)
-FIELD(ID_PFR0, CSV2, 16, 4)
-FIELD(ID_PFR0, AMU, 20, 4)
-FIELD(ID_PFR0, DIT, 24, 4)
-FIELD(ID_PFR0, RAS, 28, 4)
-
-FIELD(ID_PFR1, PROGMOD, 0, 4)
-FIELD(ID_PFR1, SECURITY, 4, 4)
-FIELD(ID_PFR1, MPROGMOD, 8, 4)
-FIELD(ID_PFR1, VIRTUALIZATION, 12, 4)
-FIELD(ID_PFR1, GENTIMER, 16, 4)
-FIELD(ID_PFR1, SEC_FRAC, 20, 4)
-FIELD(ID_PFR1, VIRT_FRAC, 24, 4)
-FIELD(ID_PFR1, GIC, 28, 4)
-
-FIELD(ID_PFR2, CSV3, 0, 4)
-FIELD(ID_PFR2, SSBS, 4, 4)
-FIELD(ID_PFR2, RAS_FRAC, 8, 4)
-
-FIELD(ID_AA64ISAR0, AES, 4, 4)
-FIELD(ID_AA64ISAR0, SHA1, 8, 4)
-FIELD(ID_AA64ISAR0, SHA2, 12, 4)
-FIELD(ID_AA64ISAR0, CRC32, 16, 4)
-FIELD(ID_AA64ISAR0, ATOMIC, 20, 4)
-FIELD(ID_AA64ISAR0, TME, 24, 4)
-FIELD(ID_AA64ISAR0, RDM, 28, 4)
-FIELD(ID_AA64ISAR0, SHA3, 32, 4)
-FIELD(ID_AA64ISAR0, SM3, 36, 4)
-FIELD(ID_AA64ISAR0, SM4, 40, 4)
-FIELD(ID_AA64ISAR0, DP, 44, 4)
-FIELD(ID_AA64ISAR0, FHM, 48, 4)
-FIELD(ID_AA64ISAR0, TS, 52, 4)
-FIELD(ID_AA64ISAR0, TLB, 56, 4)
-FIELD(ID_AA64ISAR0, RNDR, 60, 4)
-
-FIELD(ID_AA64ISAR1, DPB, 0, 4)
-FIELD(ID_AA64ISAR1, APA, 4, 4)
-FIELD(ID_AA64ISAR1, API, 8, 4)
-FIELD(ID_AA64ISAR1, JSCVT, 12, 4)
-FIELD(ID_AA64ISAR1, FCMA, 16, 4)
-FIELD(ID_AA64ISAR1, LRCPC, 20, 4)
-FIELD(ID_AA64ISAR1, GPA, 24, 4)
-FIELD(ID_AA64ISAR1, GPI, 28, 4)
-FIELD(ID_AA64ISAR1, FRINTTS, 32, 4)
-FIELD(ID_AA64ISAR1, SB, 36, 4)
-FIELD(ID_AA64ISAR1, SPECRES, 40, 4)
-FIELD(ID_AA64ISAR1, BF16, 44, 4)
-FIELD(ID_AA64ISAR1, DGH, 48, 4)
-FIELD(ID_AA64ISAR1, I8MM, 52, 4)
-FIELD(ID_AA64ISAR1, XS, 56, 4)
-FIELD(ID_AA64ISAR1, LS64, 60, 4)
-
-FIELD(ID_AA64ISAR2, WFXT, 0, 4)
-FIELD(ID_AA64ISAR2, RPRES, 4, 4)
-FIELD(ID_AA64ISAR2, GPA3, 8, 4)
-FIELD(ID_AA64ISAR2, APA3, 12, 4)
-FIELD(ID_AA64ISAR2, MOPS, 16, 4)
-FIELD(ID_AA64ISAR2, BC, 20, 4)
-FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4)
-FIELD(ID_AA64ISAR2, CLRBHB, 28, 4)
-FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4)
-FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
-FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
-FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
-FIELD(ID_AA64ISAR2, CSSC, 52, 4)
-FIELD(ID_AA64ISAR2, LUT, 56, 4)
-FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
-
-FIELD(ID_AA64PFR0, EL0, 0, 4)
-FIELD(ID_AA64PFR0, EL1, 4, 4)
-FIELD(ID_AA64PFR0, EL2, 8, 4)
-FIELD(ID_AA64PFR0, EL3, 12, 4)
-FIELD(ID_AA64PFR0, FP, 16, 4)
-FIELD(ID_AA64PFR0, ADVSIMD, 20, 4)
-FIELD(ID_AA64PFR0, GIC, 24, 4)
-FIELD(ID_AA64PFR0, RAS, 28, 4)
-FIELD(ID_AA64PFR0, SVE, 32, 4)
-FIELD(ID_AA64PFR0, SEL2, 36, 4)
-FIELD(ID_AA64PFR0, MPAM, 40, 4)
-FIELD(ID_AA64PFR0, AMU, 44, 4)
-FIELD(ID_AA64PFR0, DIT, 48, 4)
-FIELD(ID_AA64PFR0, RME, 52, 4)
-FIELD(ID_AA64PFR0, CSV2, 56, 4)
-FIELD(ID_AA64PFR0, CSV3, 60, 4)
-
-FIELD(ID_AA64PFR1, BT, 0, 4)
-FIELD(ID_AA64PFR1, SSBS, 4, 4)
-FIELD(ID_AA64PFR1, MTE, 8, 4)
-FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4)
-FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4)
-FIELD(ID_AA64PFR1, SME, 24, 4)
-FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4)
-FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4)
-FIELD(ID_AA64PFR1, NMI, 36, 4)
-FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4)
-FIELD(ID_AA64PFR1, GCS, 44, 4)
-FIELD(ID_AA64PFR1, THE, 48, 4)
-FIELD(ID_AA64PFR1, MTEX, 52, 4)
-FIELD(ID_AA64PFR1, DF2, 56, 4)
-FIELD(ID_AA64PFR1, PFAR, 60, 4)
-
-FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
-FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
-FIELD(ID_AA64MMFR0, BIGEND, 8, 4)
-FIELD(ID_AA64MMFR0, SNSMEM, 12, 4)
-FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4)
-FIELD(ID_AA64MMFR0, TGRAN16, 20, 4)
-FIELD(ID_AA64MMFR0, TGRAN64, 24, 4)
-FIELD(ID_AA64MMFR0, TGRAN4, 28, 4)
-FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4)
-FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4)
-FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4)
-FIELD(ID_AA64MMFR0, EXS, 44, 4)
-FIELD(ID_AA64MMFR0, FGT, 56, 4)
-FIELD(ID_AA64MMFR0, ECV, 60, 4)
-
-FIELD(ID_AA64MMFR1, HAFDBS, 0, 4)
-FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4)
-FIELD(ID_AA64MMFR1, VH, 8, 4)
-FIELD(ID_AA64MMFR1, HPDS, 12, 4)
-FIELD(ID_AA64MMFR1, LO, 16, 4)
-FIELD(ID_AA64MMFR1, PAN, 20, 4)
-FIELD(ID_AA64MMFR1, SPECSEI, 24, 4)
-FIELD(ID_AA64MMFR1, XNX, 28, 4)
-FIELD(ID_AA64MMFR1, TWED, 32, 4)
-FIELD(ID_AA64MMFR1, ETS, 36, 4)
-FIELD(ID_AA64MMFR1, HCX, 40, 4)
-FIELD(ID_AA64MMFR1, AFP, 44, 4)
-FIELD(ID_AA64MMFR1, NTLBPA, 48, 4)
-FIELD(ID_AA64MMFR1, TIDCP1, 52, 4)
-FIELD(ID_AA64MMFR1, CMOW, 56, 4)
-FIELD(ID_AA64MMFR1, ECBHB, 60, 4)
-
-FIELD(ID_AA64MMFR2, CNP, 0, 4)
-FIELD(ID_AA64MMFR2, UAO, 4, 4)
-FIELD(ID_AA64MMFR2, LSM, 8, 4)
-FIELD(ID_AA64MMFR2, IESB, 12, 4)
-FIELD(ID_AA64MMFR2, VARANGE, 16, 4)
-FIELD(ID_AA64MMFR2, CCIDX, 20, 4)
-FIELD(ID_AA64MMFR2, NV, 24, 4)
-FIELD(ID_AA64MMFR2, ST, 28, 4)
-FIELD(ID_AA64MMFR2, AT, 32, 4)
-FIELD(ID_AA64MMFR2, IDS, 36, 4)
-FIELD(ID_AA64MMFR2, FWB, 40, 4)
-FIELD(ID_AA64MMFR2, TTL, 48, 4)
-FIELD(ID_AA64MMFR2, BBM, 52, 4)
-FIELD(ID_AA64MMFR2, EVT, 56, 4)
-FIELD(ID_AA64MMFR2, E0PD, 60, 4)
-
-FIELD(ID_AA64MMFR3, TCRX, 0, 4)
-FIELD(ID_AA64MMFR3, SCTLRX, 4, 4)
-FIELD(ID_AA64MMFR3, S1PIE, 8, 4)
-FIELD(ID_AA64MMFR3, S2PIE, 12, 4)
-FIELD(ID_AA64MMFR3, S1POE, 16, 4)
-FIELD(ID_AA64MMFR3, S2POE, 20, 4)
-FIELD(ID_AA64MMFR3, AIE, 24, 4)
-FIELD(ID_AA64MMFR3, MEC, 28, 4)
-FIELD(ID_AA64MMFR3, D128, 32, 4)
-FIELD(ID_AA64MMFR3, D128_2, 36, 4)
-FIELD(ID_AA64MMFR3, SNERR, 40, 4)
-FIELD(ID_AA64MMFR3, ANERR, 44, 4)
-FIELD(ID_AA64MMFR3, SDERR, 52, 4)
-FIELD(ID_AA64MMFR3, ADERR, 56, 4)
-FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4)
-
-FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
-FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
-FIELD(ID_AA64DFR0, PMUVER, 8, 4)
-FIELD(ID_AA64DFR0, BRPS, 12, 4)
-FIELD(ID_AA64DFR0, PMSS, 16, 4)
-FIELD(ID_AA64DFR0, WRPS, 20, 4)
-FIELD(ID_AA64DFR0, SEBEP, 24, 4)
-FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4)
-FIELD(ID_AA64DFR0, PMSVER, 32, 4)
-FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
-FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
-FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4)
-FIELD(ID_AA64DFR0, MTPMU, 48, 4)
-FIELD(ID_AA64DFR0, BRBE, 52, 4)
-FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4)
-FIELD(ID_AA64DFR0, HPMN0, 60, 4)
-
-FIELD(ID_AA64ZFR0, SVEVER, 0, 4)
-FIELD(ID_AA64ZFR0, AES, 4, 4)
-FIELD(ID_AA64ZFR0, BITPERM, 16, 4)
-FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4)
-FIELD(ID_AA64ZFR0, B16B16, 24, 4)
-FIELD(ID_AA64ZFR0, SHA3, 32, 4)
-FIELD(ID_AA64ZFR0, SM4, 40, 4)
-FIELD(ID_AA64ZFR0, I8MM, 44, 4)
-FIELD(ID_AA64ZFR0, F32MM, 52, 4)
-FIELD(ID_AA64ZFR0, F64MM, 56, 4)
-
-FIELD(ID_AA64SMFR0, F32F32, 32, 1)
-FIELD(ID_AA64SMFR0, BI32I32, 33, 1)
-FIELD(ID_AA64SMFR0, B16F32, 34, 1)
-FIELD(ID_AA64SMFR0, F16F32, 35, 1)
-FIELD(ID_AA64SMFR0, I8I32, 36, 4)
-FIELD(ID_AA64SMFR0, F16F16, 42, 1)
-FIELD(ID_AA64SMFR0, B16B16, 43, 1)
-FIELD(ID_AA64SMFR0, I16I32, 44, 4)
-FIELD(ID_AA64SMFR0, F64F64, 48, 1)
-FIELD(ID_AA64SMFR0, I16I64, 52, 4)
-FIELD(ID_AA64SMFR0, SMEVER, 56, 4)
-FIELD(ID_AA64SMFR0, FA64, 63, 1)
-
-FIELD(ID_DFR0, COPDBG, 0, 4)
-FIELD(ID_DFR0, COPSDBG, 4, 4)
-FIELD(ID_DFR0, MMAPDBG, 8, 4)
-FIELD(ID_DFR0, COPTRC, 12, 4)
-FIELD(ID_DFR0, MMAPTRC, 16, 4)
-FIELD(ID_DFR0, MPROFDBG, 20, 4)
-FIELD(ID_DFR0, PERFMON, 24, 4)
-FIELD(ID_DFR0, TRACEFILT, 28, 4)
-
-FIELD(ID_DFR1, MTPMU, 0, 4)
-FIELD(ID_DFR1, HPMN0, 4, 4)
-
-FIELD(DBGDIDR, SE_IMP, 12, 1)
-FIELD(DBGDIDR, NSUHD_IMP, 14, 1)
-FIELD(DBGDIDR, VERSION, 16, 4)
-FIELD(DBGDIDR, CTX_CMPS, 20, 4)
-FIELD(DBGDIDR, BRPS, 24, 4)
-FIELD(DBGDIDR, WRPS, 28, 4)
-
-FIELD(DBGDEVID, PCSAMPLE, 0, 4)
-FIELD(DBGDEVID, WPADDRMASK, 4, 4)
-FIELD(DBGDEVID, BPADDRMASK, 8, 4)
-FIELD(DBGDEVID, VECTORCATCH, 12, 4)
-FIELD(DBGDEVID, VIRTEXTNS, 16, 4)
-FIELD(DBGDEVID, DOUBLELOCK, 20, 4)
-FIELD(DBGDEVID, AUXREGS, 24, 4)
-FIELD(DBGDEVID, CIDMASK, 28, 4)
-
-FIELD(DBGDEVID1, PCSROFFSET, 0, 4)
-
-FIELD(MVFR0, SIMDREG, 0, 4)
-FIELD(MVFR0, FPSP, 4, 4)
-FIELD(MVFR0, FPDP, 8, 4)
-FIELD(MVFR0, FPTRAP, 12, 4)
-FIELD(MVFR0, FPDIVIDE, 16, 4)
-FIELD(MVFR0, FPSQRT, 20, 4)
-FIELD(MVFR0, FPSHVEC, 24, 4)
-FIELD(MVFR0, FPROUND, 28, 4)
-
-FIELD(MVFR1, FPFTZ, 0, 4)
-FIELD(MVFR1, FPDNAN, 4, 4)
-FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */
-FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */
-FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */
-FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */
-FIELD(MVFR1, MVE, 8, 4) /* M-profile only */
-FIELD(MVFR1, FP16, 20, 4) /* M-profile only */
-FIELD(MVFR1, FPHP, 24, 4)
-FIELD(MVFR1, SIMDFMAC, 28, 4)
-
-FIELD(MVFR2, SIMDMISC, 0, 4)
-FIELD(MVFR2, FPMISC, 4, 4)
-
FIELD(GPCCR, PPS, 0, 3)
FIELD(GPCCR, IRGN, 8, 2)
FIELD(GPCCR, ORGN, 10, 2)
@@ -3065,8 +2655,7 @@ FIELD(TBFLAG_A64, ATA0, 31, 1)
FIELD(TBFLAG_A64, NV, 32, 1)
FIELD(TBFLAG_A64, NV1, 33, 1)
FIELD(TBFLAG_A64, NV2, 34, 1)
-/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */
-FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
+FIELD(TBFLAG_A64, E2H, 35, 1)
/* Set if FEAT_NV2 RAM accesses are big-endian */
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 2d331ff..8d2229f 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -247,10 +247,20 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg)
key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
ri = get_arm_cp_reginfo(cpu->cp_regs, key);
if (ri) {
- if (cpreg_field_is_64bit(ri)) {
+ switch (cpreg_field_type(ri)) {
+ case MO_64:
+ if (ri->vhe_redir_to_el2 &&
+ (arm_hcr_el2_eff(env) & HCR_E2H) &&
+ arm_current_el(env) == 2) {
+ ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2);
+ } else if (ri->vhe_redir_to_el01) {
+ ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01);
+ }
return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
- } else {
+ case MO_32:
return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
+ default:
+ g_assert_not_reached();
}
}
return 0;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c442947..aa730ad 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -40,26 +40,57 @@
static void switch_mode(CPUARMState *env, int mode);
+int compare_u64(const void *a, const void *b)
+{
+ if (*(uint64_t *)a > *(uint64_t *)b) {
+ return 1;
+ }
+ if (*(uint64_t *)a < *(uint64_t *)b) {
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Macros which are lvalues for the field in CPUARMState for the
+ * ARMCPRegInfo *ri.
+ */
+#define CPREG_FIELD32(env, ri) \
+ (*(uint32_t *)((char *)(env) + (ri)->fieldoffset))
+#define CPREG_FIELD64(env, ri) \
+ (*(uint64_t *)((char *)(env) + (ri)->fieldoffset))
+
uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
assert(ri->fieldoffset);
- if (cpreg_field_is_64bit(ri)) {
+ switch (cpreg_field_type(ri)) {
+ case MO_64:
return CPREG_FIELD64(env, ri);
- } else {
+ case MO_32:
return CPREG_FIELD32(env, ri);
+ default:
+ g_assert_not_reached();
}
}
void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
assert(ri->fieldoffset);
- if (cpreg_field_is_64bit(ri)) {
+ switch (cpreg_field_type(ri)) {
+ case MO_64:
CPREG_FIELD64(env, ri) = value;
- } else {
+ break;
+ case MO_32:
CPREG_FIELD32(env, ri) = value;
+ break;
+ default:
+ g_assert_not_reached();
}
}
+#undef CPREG_FIELD32
+#undef CPREG_FIELD64
+
static void *raw_ptr(CPUARMState *env, const ARMCPRegInfo *ri)
{
return (char *)env + ri->fieldoffset;
@@ -198,11 +229,11 @@ bool write_list_to_cpustate(ARMCPU *cpu)
return ok;
}
-static void add_cpreg_to_list(gpointer key, gpointer opaque)
+static void add_cpreg_to_list(gpointer key, gpointer value, gpointer opaque)
{
ARMCPU *cpu = opaque;
uint32_t regidx = (uintptr_t)key;
- const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
+ const ARMCPRegInfo *ri = value;
if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) {
cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx);
@@ -211,61 +242,49 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque)
}
}
-static void count_cpreg(gpointer key, gpointer opaque)
+static void count_cpreg(gpointer key, gpointer value, gpointer opaque)
{
ARMCPU *cpu = opaque;
- const ARMCPRegInfo *ri;
-
- ri = g_hash_table_lookup(cpu->cp_regs, key);
+ const ARMCPRegInfo *ri = value;
if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) {
cpu->cpreg_array_len++;
}
}
-static gint cpreg_key_compare(gconstpointer a, gconstpointer b, gpointer d)
-{
- uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a);
- uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b);
-
- if (aidx > bidx) {
- return 1;
- }
- if (aidx < bidx) {
- return -1;
- }
- return 0;
-}
-
void init_cpreg_list(ARMCPU *cpu)
{
/*
* Initialise the cpreg_tuples[] array based on the cp_regs hash.
* Note that we require cpreg_tuples[] to be sorted by key ID.
*/
- GList *keys;
int arraylen;
- keys = g_hash_table_get_keys(cpu->cp_regs);
- keys = g_list_sort_with_data(keys, cpreg_key_compare, NULL);
-
cpu->cpreg_array_len = 0;
-
- g_list_foreach(keys, count_cpreg, cpu);
+ g_hash_table_foreach(cpu->cp_regs, count_cpreg, cpu);
arraylen = cpu->cpreg_array_len;
- cpu->cpreg_indexes = g_new(uint64_t, arraylen);
- cpu->cpreg_values = g_new(uint64_t, arraylen);
- cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen);
- cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen);
- cpu->cpreg_vmstate_array_len = cpu->cpreg_array_len;
+ if (arraylen) {
+ cpu->cpreg_indexes = g_new(uint64_t, arraylen);
+ cpu->cpreg_values = g_new(uint64_t, arraylen);
+ cpu->cpreg_vmstate_indexes = g_new(uint64_t, arraylen);
+ cpu->cpreg_vmstate_values = g_new(uint64_t, arraylen);
+ } else {
+ cpu->cpreg_indexes = NULL;
+ cpu->cpreg_values = NULL;
+ cpu->cpreg_vmstate_indexes = NULL;
+ cpu->cpreg_vmstate_values = NULL;
+ }
+ cpu->cpreg_vmstate_array_len = arraylen;
cpu->cpreg_array_len = 0;
- g_list_foreach(keys, add_cpreg_to_list, cpu);
+ g_hash_table_foreach(cpu->cp_regs, add_cpreg_to_list, cpu);
assert(cpu->cpreg_array_len == arraylen);
- g_list_free(keys);
+ if (arraylen) {
+ qsort(cpu->cpreg_indexes, arraylen, sizeof(uint64_t), compare_u64);
+ }
}
bool arm_pan_enabled(CPUARMState *env)
@@ -435,6 +454,8 @@ static const ARMCPRegInfo cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_CONTEXTIDR_EL1,
.nv2_redirect_offset = 0x108 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 1),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 1),
.secure = ARM_CP_SECSTATE_NS,
.fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]),
.resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
@@ -652,9 +673,11 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
*/
{ .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
- { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
+ { .name = "CPACR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
.crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
.fgt = FGT_CPACR_EL1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 1, 2),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 2),
.nv2_redirect_offset = 0x100 | NV2_REDIR_NV1,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
.resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
@@ -937,12 +960,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_AFSR0_EL1,
.nv2_redirect_offset = 0x128 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 0),
.type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_AFSR1_EL1,
.nv2_redirect_offset = 0x130 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 1, 1),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 1, 1),
.type = ARM_CP_CONST, .resetvalue = 0 },
/*
* MAIR can just read-as-written because we don't implement caches
@@ -953,6 +980,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_MAIR_EL1,
.nv2_redirect_offset = 0x140 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 2, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 2, 0),
.fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]),
.resetvalue = 0 },
{ .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64,
@@ -1062,7 +1091,7 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = {
.resetvalue = 0 },
};
-static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque)
+static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
ARMCPU *cpu = env_archcpu(env);
@@ -1999,9 +2028,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.resetfn = arm_gt_cntfrq_reset,
},
/* overall control: mostly access permissions */
- { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
+ { .name = "CNTKCTL_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0,
.access = PL1_RW,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 14, 1, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 14, 1, 0),
.fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl),
.resetvalue = 0,
},
@@ -2731,7 +2762,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
/* If the ASID changes (with a 64-bit write), we must flush the TLB. */
- if (cpreg_field_is_64bit(ri) &&
+ if (cpreg_field_type(ri) == MO_64 &&
extract64(raw_read(env, ri) ^ value, 48, 16) != 0) {
ARMCPU *cpu = env_archcpu(env);
tlb_flush(CPU(cpu));
@@ -2792,6 +2823,8 @@ static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_FAR_EL1,
.nv2_redirect_offset = 0x220 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 6, 0, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 6, 0, 0),
.fieldoffset = offsetof(CPUARMState, cp15.far_el[1]),
.resetvalue = 0, },
};
@@ -2802,12 +2835,16 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_ESR_EL1,
.nv2_redirect_offset = 0x138 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 2, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 2, 0),
.fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, },
{ .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_TTBR0_EL1,
.nv2_redirect_offset = 0x200 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 0),
.writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
offsetof(CPUARMState, cp15.ttbr0_ns) } },
@@ -2816,6 +2853,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_TTBR1_EL1,
.nv2_redirect_offset = 0x210 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 1),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 1),
.writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
offsetof(CPUARMState, cp15.ttbr1_ns) } },
@@ -2824,6 +2863,8 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_TCR_EL1,
.nv2_redirect_offset = 0x120 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 2),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 2),
.writefn = vmsa_tcr_el12_write,
.raw_writefn = raw_write,
.resetvalue = 0,
@@ -3029,12 +3070,14 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
}
static const ARMCPRegInfo lpae_cp_reginfo[] = {
- /* NOP AMAIR0/1 */
- { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
+ /* AMAIR0 is mapped to AMAIR_EL1[31:0] */
+ { .name = "AMAIR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_AMAIR_EL1,
.nv2_redirect_offset = 0x148 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 10, 3, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 10, 3, 0),
.type = ARM_CP_CONST, .resetvalue = 0 },
/* AMAIR1 is mapped to AMAIR_EL1[63:32] */
{ .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
@@ -3550,12 +3593,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
.access = PL1_RW, .accessfn = access_nv1,
.nv2_redirect_offset = 0x230 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 1),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 1),
.fieldoffset = offsetof(CPUARMState, elr_el[1]) },
{ .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
.access = PL1_RW, .accessfn = access_nv1,
.nv2_redirect_offset = 0x160 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 4, 0, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 4, 0, 0),
.fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) },
/*
* We rely on the access checks not allowing the guest to write to the
@@ -4398,234 +4445,6 @@ static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri,
return e2h_access(env, ri, isread);
}
-/* Test if system register redirection is to occur in the current state. */
-static bool redirect_for_e2h(CPUARMState *env)
-{
- return arm_current_el(env) == 2 && (arm_hcr_el2_eff(env) & HCR_E2H);
-}
-
-static uint64_t el2_e2h_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- CPReadFn *readfn;
-
- if (redirect_for_e2h(env)) {
- /* Switch to the saved EL2 version of the register. */
- ri = ri->opaque;
- readfn = ri->readfn;
- } else {
- readfn = ri->orig_readfn;
- }
- if (readfn == NULL) {
- readfn = raw_read;
- }
- return readfn(env, ri);
-}
-
-static void el2_e2h_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- CPWriteFn *writefn;
-
- if (redirect_for_e2h(env)) {
- /* Switch to the saved EL2 version of the register. */
- ri = ri->opaque;
- writefn = ri->writefn;
- } else {
- writefn = ri->orig_writefn;
- }
- if (writefn == NULL) {
- writefn = raw_write;
- }
- writefn(env, ri, value);
-}
-
-static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- /* Pass the EL1 register accessor its ri, not the EL12 alias ri */
- return ri->orig_readfn(env, ri->opaque);
-}
-
-static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Pass the EL1 register accessor its ri, not the EL12 alias ri */
- return ri->orig_writefn(env, ri->opaque, value);
-}
-
-static CPAccessResult el2_e2h_e12_access(CPUARMState *env,
- const ARMCPRegInfo *ri,
- bool isread)
-{
- if (arm_current_el(env) == 1) {
- /*
- * This must be a FEAT_NV access (will either trap or redirect
- * to memory). None of the registers with _EL12 aliases want to
- * apply their trap controls for this kind of access, so don't
- * call the orig_accessfn or do the "UNDEF when E2H is 0" check.
- */
- return CP_ACCESS_OK;
- }
- /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
- if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
- return CP_ACCESS_UNDEFINED;
- }
- if (ri->orig_accessfn) {
- return ri->orig_accessfn(env, ri->opaque, isread);
- }
- return CP_ACCESS_OK;
-}
-
-static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
-{
- struct E2HAlias {
- uint32_t src_key, dst_key, new_key;
- const char *src_name, *dst_name, *new_name;
- bool (*feature)(const ARMISARegisters *id);
- };
-
-#define K(op0, op1, crn, crm, op2) \
- ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
-
- static const struct E2HAlias aliases[] = {
- { K(3, 0, 1, 0, 0), K(3, 4, 1, 0, 0), K(3, 5, 1, 0, 0),
- "SCTLR", "SCTLR_EL2", "SCTLR_EL12" },
- { K(3, 0, 1, 0, 3), K(3, 4, 1, 0, 3), K(3, 5, 1, 0, 3),
- "SCTLR2_EL1", "SCTLR2_EL2", "SCTLR2_EL12", isar_feature_aa64_sctlr2 },
- { K(3, 0, 1, 0, 2), K(3, 4, 1, 1, 2), K(3, 5, 1, 0, 2),
- "CPACR", "CPTR_EL2", "CPACR_EL12" },
- { K(3, 0, 2, 0, 0), K(3, 4, 2, 0, 0), K(3, 5, 2, 0, 0),
- "TTBR0_EL1", "TTBR0_EL2", "TTBR0_EL12" },
- { K(3, 0, 2, 0, 1), K(3, 4, 2, 0, 1), K(3, 5, 2, 0, 1),
- "TTBR1_EL1", "TTBR1_EL2", "TTBR1_EL12" },
- { K(3, 0, 2, 0, 2), K(3, 4, 2, 0, 2), K(3, 5, 2, 0, 2),
- "TCR_EL1", "TCR_EL2", "TCR_EL12" },
- { K(3, 0, 2, 0, 3), K(3, 4, 2, 0, 3), K(3, 5, 2, 0, 3),
- "TCR2_EL1", "TCR2_EL2", "TCR2_EL12", isar_feature_aa64_tcr2 },
- { K(3, 0, 4, 0, 0), K(3, 4, 4, 0, 0), K(3, 5, 4, 0, 0),
- "SPSR_EL1", "SPSR_EL2", "SPSR_EL12" },
- { K(3, 0, 4, 0, 1), K(3, 4, 4, 0, 1), K(3, 5, 4, 0, 1),
- "ELR_EL1", "ELR_EL2", "ELR_EL12" },
- { K(3, 0, 5, 1, 0), K(3, 4, 5, 1, 0), K(3, 5, 5, 1, 0),
- "AFSR0_EL1", "AFSR0_EL2", "AFSR0_EL12" },
- { K(3, 0, 5, 1, 1), K(3, 4, 5, 1, 1), K(3, 5, 5, 1, 1),
- "AFSR1_EL1", "AFSR1_EL2", "AFSR1_EL12" },
- { K(3, 0, 5, 2, 0), K(3, 4, 5, 2, 0), K(3, 5, 5, 2, 0),
- "ESR_EL1", "ESR_EL2", "ESR_EL12" },
- { K(3, 0, 6, 0, 0), K(3, 4, 6, 0, 0), K(3, 5, 6, 0, 0),
- "FAR_EL1", "FAR_EL2", "FAR_EL12" },
- { K(3, 0, 10, 2, 0), K(3, 4, 10, 2, 0), K(3, 5, 10, 2, 0),
- "MAIR_EL1", "MAIR_EL2", "MAIR_EL12" },
- { K(3, 0, 10, 3, 0), K(3, 4, 10, 3, 0), K(3, 5, 10, 3, 0),
- "AMAIR0", "AMAIR_EL2", "AMAIR_EL12" },
- { K(3, 0, 12, 0, 0), K(3, 4, 12, 0, 0), K(3, 5, 12, 0, 0),
- "VBAR", "VBAR_EL2", "VBAR_EL12" },
- { K(3, 0, 13, 0, 1), K(3, 4, 13, 0, 1), K(3, 5, 13, 0, 1),
- "CONTEXTIDR_EL1", "CONTEXTIDR_EL2", "CONTEXTIDR_EL12" },
- { K(3, 0, 14, 1, 0), K(3, 4, 14, 1, 0), K(3, 5, 14, 1, 0),
- "CNTKCTL", "CNTHCTL_EL2", "CNTKCTL_EL12" },
-
- { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0),
- "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve },
- { K(3, 0, 1, 2, 6), K(3, 4, 1, 2, 6), K(3, 5, 1, 2, 6),
- "SMCR_EL1", "SMCR_EL2", "SMCR_EL12", isar_feature_aa64_sme },
-
- { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0),
- "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte },
-
- { K(3, 0, 13, 0, 7), K(3, 4, 13, 0, 7), K(3, 5, 13, 0, 7),
- "SCXTNUM_EL1", "SCXTNUM_EL2", "SCXTNUM_EL12",
- isar_feature_aa64_scxtnum },
-
- /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */
- /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */
- };
-#undef K
-
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(aliases); i++) {
- const struct E2HAlias *a = &aliases[i];
- ARMCPRegInfo *src_reg, *dst_reg, *new_reg;
- bool ok;
-
- if (a->feature && !a->feature(&cpu->isar)) {
- continue;
- }
-
- src_reg = g_hash_table_lookup(cpu->cp_regs,
- (gpointer)(uintptr_t)a->src_key);
- dst_reg = g_hash_table_lookup(cpu->cp_regs,
- (gpointer)(uintptr_t)a->dst_key);
- g_assert(src_reg != NULL);
- g_assert(dst_reg != NULL);
-
- /* Cross-compare names to detect typos in the keys. */
- g_assert(strcmp(src_reg->name, a->src_name) == 0);
- g_assert(strcmp(dst_reg->name, a->dst_name) == 0);
-
- /* None of the core system registers use opaque; we will. */
- g_assert(src_reg->opaque == NULL);
-
- /* Create alias before redirection so we dup the right data. */
- new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo));
-
- new_reg->name = a->new_name;
- new_reg->type |= ARM_CP_ALIAS;
- /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */
- new_reg->access &= PL2_RW | PL3_RW;
- /* The new_reg op fields are as per new_key, not the target reg */
- new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK)
- >> CP_REG_ARM64_SYSREG_CRN_SHIFT;
- new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK)
- >> CP_REG_ARM64_SYSREG_CRM_SHIFT;
- new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK)
- >> CP_REG_ARM64_SYSREG_OP0_SHIFT;
- new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK)
- >> CP_REG_ARM64_SYSREG_OP1_SHIFT;
- new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK)
- >> CP_REG_ARM64_SYSREG_OP2_SHIFT;
- new_reg->opaque = src_reg;
- new_reg->orig_readfn = src_reg->readfn ?: raw_read;
- new_reg->orig_writefn = src_reg->writefn ?: raw_write;
- new_reg->orig_accessfn = src_reg->accessfn;
- if (!new_reg->raw_readfn) {
- new_reg->raw_readfn = raw_read;
- }
- if (!new_reg->raw_writefn) {
- new_reg->raw_writefn = raw_write;
- }
- new_reg->readfn = el2_e2h_e12_read;
- new_reg->writefn = el2_e2h_e12_write;
- new_reg->accessfn = el2_e2h_e12_access;
-
- /*
- * If the _EL1 register is redirected to memory by FEAT_NV2,
- * then it shares the offset with the _EL12 register,
- * and which one is redirected depends on HCR_EL2.NV1.
- */
- if (new_reg->nv2_redirect_offset) {
- assert(new_reg->nv2_redirect_offset & NV2_REDIR_NV1);
- new_reg->nv2_redirect_offset &= ~NV2_REDIR_NV1;
- new_reg->nv2_redirect_offset |= NV2_REDIR_NO_NV1;
- }
-
- ok = g_hash_table_insert(cpu->cp_regs,
- (gpointer)(uintptr_t)a->new_key, new_reg);
- g_assert(ok);
-
- src_reg->opaque = dst_reg;
- src_reg->orig_readfn = src_reg->readfn ?: raw_read;
- src_reg->orig_writefn = src_reg->writefn ?: raw_write;
- if (!src_reg->raw_readfn) {
- src_reg->raw_readfn = raw_read;
- }
- if (!src_reg->raw_writefn) {
- src_reg->raw_writefn = raw_write;
- }
- src_reg->readfn = el2_e2h_read;
- src_reg->writefn = el2_e2h_write;
- }
-}
#endif
static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -4918,6 +4737,8 @@ static const ARMCPRegInfo zcr_reginfo[] = {
{ .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
.nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 0),
.access = PL1_RW, .type = ARM_CP_SVE,
.fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
.writefn = zcr_write, .raw_writefn = raw_write },
@@ -5063,6 +4884,8 @@ static const ARMCPRegInfo sme_reginfo[] = {
{ .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6,
.nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 2, 6),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 2, 6),
.access = PL1_RW, .type = ARM_CP_SME,
.fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]),
.writefn = smcr_write, .raw_writefn = raw_write },
@@ -5184,7 +5007,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
uint64_t pfr1 = GET_IDREG(&cpu->isar, ID_PFR1);
if (env->gicv3state) {
- pfr1 |= 1 << 28;
+ pfr1 = FIELD_DP64(pfr1, ID_PFR1, GIC, 1);
}
return pfr1;
}
@@ -5195,7 +5018,7 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
uint64_t pfr0 = GET_IDREG(&cpu->isar, ID_AA64PFR0);
if (env->gicv3state) {
- pfr0 |= 1 << 24;
+ pfr0 = FIELD_DP64(pfr0, ID_AA64PFR0, GIC, 1);
}
return pfr0;
}
@@ -5371,7 +5194,7 @@ static const ARMCPRegInfo rndr_reginfo[] = {
.access = PL0_R, .readfn = rndr_readfn },
};
-static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque,
+static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
#ifdef CONFIG_TCG
@@ -5508,6 +5331,8 @@ static const ARMCPRegInfo mte_reginfo[] = {
.opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tfsr_el1,
.nv2_redirect_offset = 0x190 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 5, 6, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 5, 6, 0),
.fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
{ .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_NV2_REDIRECT,
@@ -5683,6 +5508,8 @@ static const ARMCPRegInfo scxtnum_reginfo[] = {
.access = PL1_RW, .accessfn = access_scxtnum_el1,
.fgt = FGT_SCXTNUM_EL1,
.nv2_redirect_offset = 0x188 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 13, 0, 7),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 13, 0, 7),
.fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) },
{ .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7,
@@ -6027,6 +5854,8 @@ static const ARMCPRegInfo sctlr2_reginfo[] = {
.opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 1, .crm = 0,
.access = PL1_RW, .accessfn = sctlr2_el1_access,
.writefn = sctlr2_el1_write, .fgt = FGT_SCTLR_EL1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 3),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 3),
.nv2_redirect_offset = 0x278 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.sctlr2_el[1]) },
{ .name = "SCTLR2_EL2", .state = ARM_CP_STATE_AA64,
@@ -6087,6 +5916,8 @@ static const ARMCPRegInfo tcr2_reginfo[] = {
.opc0 = 3, .opc1 = 0, .opc2 = 3, .crn = 2, .crm = 0,
.access = PL1_RW, .accessfn = tcr2_el1_access,
.writefn = tcr2_el1_write, .fgt = FGT_TCR_EL1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 2, 0, 3),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 2, 0, 3),
.nv2_redirect_offset = 0x270 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.tcr2_el[1]) },
{ .name = "TCR2_EL2", .state = ARM_CP_STATE_AA64,
@@ -6278,11 +6109,11 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
.resetvalue = GET_IDREG(isar, ID_AA64PFR1)},
- { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ { .name = "ID_AA64PFR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
.accessfn = access_aa64_tid3,
- .resetvalue = 0 },
+ .resetvalue = GET_IDREG(isar, ID_AA64PFR2)},
{ .name = "ID_AA64PFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
@@ -6510,6 +6341,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
R_ID_AA64PFR1_SSBS_MASK |
R_ID_AA64PFR1_MTE_MASK |
R_ID_AA64PFR1_SME_MASK },
+ { .name = "ID_AA64PFR2_EL1",
+ .exported_bits = 0 },
{ .name = "ID_AA64PFR*_EL1_RESERVED",
.is_glob = true },
{ .name = "ID_AA64ZFR0_EL1",
@@ -7177,12 +7010,14 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (arm_feature(env, ARM_FEATURE_VBAR)) {
static const ARMCPRegInfo vbar_cp_reginfo[] = {
- { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
+ { .name = "VBAR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .writefn = vbar_write,
.accessfn = access_nv1,
.fgt = FGT_VBAR_EL1,
.nv2_redirect_offset = 0x250 | NV2_REDIR_NV1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 12, 0, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 12, 0, 0),
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s),
offsetof(CPUARMState, cp15.vbar_ns) },
.resetvalue = 0 },
@@ -7193,10 +7028,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
/* Generic registers whose values depend on the implementation */
{
ARMCPRegInfo sctlr = {
- .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
+ .name = "SCTLR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
.access = PL1_RW, .accessfn = access_tvm_trvm,
.fgt = FGT_SCTLR_EL1,
+ .vhe_redir_to_el2 = ENCODE_AA64_CP_REG(3, 4, 1, 0, 0),
+ .vhe_redir_to_el01 = ENCODE_AA64_CP_REG(3, 5, 1, 0, 0),
.nv2_redirect_offset = 0x110 | NV2_REDIR_NV1,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s),
offsetof(CPUARMState, cp15.sctlr_ns) },
@@ -7331,61 +7168,40 @@ void register_cp_regs_for_features(ARMCPU *cpu)
}
define_pm_cpregs(cpu);
+}
-#ifndef CONFIG_USER_ONLY
- /*
- * Register redirections and aliases must be done last,
- * after the registers from the other extensions have been defined.
- */
- if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) {
- define_arm_vh_e2h_redirects_aliases(cpu);
+/*
+ * Copy a ARMCPRegInfo structure, allocating it along with the name
+ * and an optional suffix to the name.
+ */
+static ARMCPRegInfo *alloc_cpreg(const ARMCPRegInfo *in, const char *suffix)
+{
+ const char *name = in->name;
+ size_t name_len = strlen(name);
+ size_t suff_len = suffix ? strlen(suffix) : 0;
+ ARMCPRegInfo *out = g_malloc(sizeof(*in) + name_len + suff_len + 1);
+ char *p = (char *)(out + 1);
+
+ *out = *in;
+ out->name = p;
+
+ memcpy(p, name, name_len + 1);
+ if (suffix) {
+ memcpy(p + name_len, suffix, suff_len + 1);
}
-#endif
+ return out;
}
/*
- * Private utility function for define_one_arm_cp_reg_with_opaque():
+ * Private utility function for define_one_arm_cp_reg():
* add a single reginfo struct to the hash table.
*/
-static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
- void *opaque, CPState state,
- CPSecureState secstate,
- int crm, int opc1, int opc2,
- const char *name)
+static void add_cpreg_to_hashtable(ARMCPU *cpu, ARMCPRegInfo *r,
+ CPState state, CPSecureState secstate,
+ uint32_t key)
{
CPUARMState *env = &cpu->env;
- uint32_t key;
- ARMCPRegInfo *r2;
- bool is64 = r->type & ARM_CP_64BIT;
bool ns = secstate & ARM_CP_SECSTATE_NS;
- int cp = r->cp;
- size_t name_len;
- bool make_const;
-
- switch (state) {
- case ARM_CP_STATE_AA32:
- /* We assume it is a cp15 register if the .cp field is left unset. */
- if (cp == 0 && r->state == ARM_CP_STATE_BOTH) {
- cp = 15;
- }
- key = ENCODE_CP_REG(cp, is64, ns, r->crn, crm, opc1, opc2);
- break;
- case ARM_CP_STATE_AA64:
- /*
- * To allow abbreviation of ARMCPRegInfo definitions, we treat
- * cp == 0 as equivalent to the value for "standard guest-visible
- * sysreg". STATE_BOTH definitions are also always "standard sysreg"
- * in their AArch64 view (the .cp value may be non-zero for the
- * benefit of the AArch32 view).
- */
- if (cp == 0 || r->state == ARM_CP_STATE_BOTH) {
- cp = CP_REG_ARM64_SYSREG_CP;
- }
- key = ENCODE_AA64_CP_REG(cp, r->crn, crm, r->opc0, opc1, opc2);
- break;
- default:
- g_assert_not_reached();
- }
/* Overriding of an existing definition must be explicitly requested. */
if (!(r->type & ARM_CP_OVERRIDE)) {
@@ -7395,84 +7211,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
}
}
- /*
- * Eliminate registers that are not present because the EL is missing.
- * Doing this here makes it easier to put all registers for a given
- * feature into the same ARMCPRegInfo array and define them all at once.
- */
- make_const = false;
- if (arm_feature(env, ARM_FEATURE_EL3)) {
- /*
- * An EL2 register without EL2 but with EL3 is (usually) RES0.
- * See rule RJFFP in section D1.1.3 of DDI0487H.a.
- */
- int min_el = ctz32(r->access) / 2;
- if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) {
- if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) {
- return;
- }
- make_const = !(r->type & ARM_CP_EL3_NO_EL2_KEEP);
- }
- } else {
- CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2)
- ? PL2_RW : PL1_RW);
- if ((r->access & max_el) == 0) {
- return;
- }
- }
-
- /* Combine cpreg and name into one allocation. */
- name_len = strlen(name) + 1;
- r2 = g_malloc(sizeof(*r2) + name_len);
- *r2 = *r;
- r2->name = memcpy(r2 + 1, name, name_len);
-
- /*
- * Update fields to match the instantiation, overwiting wildcards
- * such as CP_ANY, ARM_CP_STATE_BOTH, or ARM_CP_SECSTATE_BOTH.
- */
- r2->cp = cp;
- r2->crm = crm;
- r2->opc1 = opc1;
- r2->opc2 = opc2;
- r2->state = state;
- r2->secure = secstate;
- if (opaque) {
- r2->opaque = opaque;
- }
-
- if (make_const) {
- /* This should not have been a very special register to begin. */
- int old_special = r2->type & ARM_CP_SPECIAL_MASK;
- assert(old_special == 0 || old_special == ARM_CP_NOP);
- /*
- * Set the special function to CONST, retaining the other flags.
- * This is important for e.g. ARM_CP_SVE so that we still
- * take the SVE trap if CPTR_EL3.EZ == 0.
- */
- r2->type = (r2->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST;
- /*
- * Usually, these registers become RES0, but there are a few
- * special cases like VPIDR_EL2 which have a constant non-zero
- * value with writes ignored.
- */
- if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) {
- r2->resetvalue = 0;
- }
- /*
- * ARM_CP_CONST has precedence, so removing the callbacks and
- * offsets are not strictly necessary, but it is potentially
- * less confusing to debug later.
- */
- r2->readfn = NULL;
- r2->writefn = NULL;
- r2->raw_readfn = NULL;
- r2->raw_writefn = NULL;
- r2->resetfn = NULL;
- r2->fieldoffset = 0;
- r2->bank_fieldoffsets[0] = 0;
- r2->bank_fieldoffsets[1] = 0;
- } else {
+ {
bool isbanked = r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1];
if (isbanked) {
@@ -7481,7 +7220,7 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
* Overwriting fieldoffset as the array is only used to define
* banked registers but later only fieldoffset is used.
*/
- r2->fieldoffset = r->bank_fieldoffsets[ns];
+ r->fieldoffset = r->bank_fieldoffsets[ns];
}
if (state == ARM_CP_STATE_AA32) {
if (isbanked) {
@@ -7498,54 +7237,187 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
*/
if ((r->state == ARM_CP_STATE_BOTH && ns) ||
(arm_feature(env, ARM_FEATURE_V8) && !ns)) {
- r2->type |= ARM_CP_ALIAS;
+ r->type |= ARM_CP_ALIAS;
}
} else if ((secstate != r->secure) && !ns) {
/*
* The register is not banked so we only want to allow
* migration of the non-secure instance.
*/
- r2->type |= ARM_CP_ALIAS;
- }
-
- if (HOST_BIG_ENDIAN &&
- r->state == ARM_CP_STATE_BOTH && r2->fieldoffset) {
- r2->fieldoffset += sizeof(uint32_t);
+ r->type |= ARM_CP_ALIAS;
}
}
}
/*
- * By convention, for wildcarded registers only the first
- * entry is used for migration; the others are marked as
- * ALIAS so we don't try to transfer the register
- * multiple times. Special registers (ie NOP/WFI) are
- * never migratable and not even raw-accessible.
+ * For 32-bit AArch32 regs shared with 64-bit AArch64 regs,
+ * adjust the field offset for endianness. This had to be
+ * delayed until banked registers were resolved.
*/
- if (r2->type & ARM_CP_SPECIAL_MASK) {
- r2->type |= ARM_CP_NO_RAW;
+ if (HOST_BIG_ENDIAN &&
+ state == ARM_CP_STATE_AA32 &&
+ r->state == ARM_CP_STATE_BOTH &&
+ r->fieldoffset) {
+ r->fieldoffset += sizeof(uint32_t);
}
- if (((r->crm == CP_ANY) && crm != 0) ||
- ((r->opc1 == CP_ANY) && opc1 != 0) ||
- ((r->opc2 == CP_ANY) && opc2 != 0)) {
- r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB;
+
+ /*
+ * Special registers (ie NOP/WFI) are never migratable and
+ * are not even raw-accessible.
+ */
+ if (r->type & ARM_CP_SPECIAL_MASK) {
+ r->type |= ARM_CP_NO_RAW;
}
/*
+ * Update fields to match the instantiation, overwiting wildcards
+ * such as ARM_CP_STATE_BOTH or ARM_CP_SECSTATE_BOTH.
+ */
+ r->state = state;
+ r->secure = secstate;
+
+ /*
* Check that raw accesses are either forbidden or handled. Note that
* we can't assert this earlier because the setup of fieldoffset for
* banked registers has to be done first.
*/
- if (!(r2->type & ARM_CP_NO_RAW)) {
- assert(!raw_accessors_invalid(r2));
+ if (!(r->type & ARM_CP_NO_RAW)) {
+ assert(!raw_accessors_invalid(r));
}
- g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r2);
+ g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r);
+}
+
+static void add_cpreg_to_hashtable_aa32(ARMCPU *cpu, ARMCPRegInfo *r)
+{
+ /*
+ * Under AArch32 CP registers can be common
+ * (same for secure and non-secure world) or banked.
+ */
+ ARMCPRegInfo *r_s;
+ bool is64 = r->type & ARM_CP_64BIT;
+ uint32_t key = ENCODE_CP_REG(r->cp, is64, 0, r->crn,
+ r->crm, r->opc1, r->opc2);
+
+ assert(!(r->type & ARM_CP_ADD_TLBI_NXS)); /* aa64 only */
+ r->vhe_redir_to_el2 = 0;
+ r->vhe_redir_to_el01 = 0;
+
+ switch (r->secure) {
+ case ARM_CP_SECSTATE_NS:
+ key |= CP_REG_AA32_NS_MASK;
+ /* fall through */
+ case ARM_CP_SECSTATE_S:
+ add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32, r->secure, key);
+ break;
+ case ARM_CP_SECSTATE_BOTH:
+ r_s = alloc_cpreg(r, "_S");
+ add_cpreg_to_hashtable(cpu, r_s, ARM_CP_STATE_AA32,
+ ARM_CP_SECSTATE_S, key);
+
+ key |= CP_REG_AA32_NS_MASK;
+ add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA32,
+ ARM_CP_SECSTATE_NS, key);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
+static void add_cpreg_to_hashtable_aa64(ARMCPU *cpu, ARMCPRegInfo *r)
+{
+ uint32_t key = ENCODE_AA64_CP_REG(r->opc0, r->opc1,
+ r->crn, r->crm, r->opc2);
+
+ if ((r->type & ARM_CP_ADD_TLBI_NXS) &&
+ cpu_isar_feature(aa64_xs, cpu)) {
+ /*
+ * This is a TLBI insn which has an NXS variant. The
+ * NXS variant is at the same encoding except that
+ * crn is +1, and has the same behaviour except for
+ * fine-grained trapping. Add the NXS insn here and
+ * then fall through to add the normal register.
+ * add_cpreg_to_hashtable() copies the cpreg struct
+ * and name that it is passed, so it's OK to use
+ * a local struct here.
+ */
+ ARMCPRegInfo *nxs_ri = alloc_cpreg(r, "NXS");
+ uint32_t nxs_key;
+
+ assert(nxs_ri->crn < 0xf);
+ nxs_ri->crn++;
+ /* Also increment the CRN field inside the key value */
+ nxs_key = key + (1 << CP_REG_ARM64_SYSREG_CRN_SHIFT);
+ if (nxs_ri->fgt) {
+ nxs_ri->fgt |= R_FGT_NXS_MASK;
+ }
+
+ add_cpreg_to_hashtable(cpu, nxs_ri, ARM_CP_STATE_AA64,
+ ARM_CP_SECSTATE_NS, nxs_key);
+ }
+
+ if (!r->vhe_redir_to_el01) {
+ assert(!r->vhe_redir_to_el2);
+ } else if (!arm_feature(&cpu->env, ARM_FEATURE_EL2) ||
+ !cpu_isar_feature(aa64_vh, cpu)) {
+ r->vhe_redir_to_el2 = 0;
+ r->vhe_redir_to_el01 = 0;
+ } else {
+ /* Create the FOO_EL12 alias. */
+ ARMCPRegInfo *r2 = alloc_cpreg(r, "2");
+ uint32_t key2 = r->vhe_redir_to_el01;
+
+ /*
+ * Clear EL1 redirection on the FOO_EL1 reg;
+ * Clear EL2 redirection on the FOO_EL12 reg;
+ * Install redirection from FOO_EL12 back to FOO_EL1.
+ */
+ r->vhe_redir_to_el01 = 0;
+ r2->vhe_redir_to_el2 = 0;
+ r2->vhe_redir_to_el01 = key;
+
+ r2->type |= ARM_CP_ALIAS | ARM_CP_NO_RAW;
+ /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */
+ r2->access &= PL2_RW | PL3_RW;
+ /* The new_reg op fields are as per new_key, not the target reg */
+ r2->crn = (key2 & CP_REG_ARM64_SYSREG_CRN_MASK)
+ >> CP_REG_ARM64_SYSREG_CRN_SHIFT;
+ r2->crm = (key2 & CP_REG_ARM64_SYSREG_CRM_MASK)
+ >> CP_REG_ARM64_SYSREG_CRM_SHIFT;
+ r2->opc0 = (key2 & CP_REG_ARM64_SYSREG_OP0_MASK)
+ >> CP_REG_ARM64_SYSREG_OP0_SHIFT;
+ r2->opc1 = (key2 & CP_REG_ARM64_SYSREG_OP1_MASK)
+ >> CP_REG_ARM64_SYSREG_OP1_SHIFT;
+ r2->opc2 = (key2 & CP_REG_ARM64_SYSREG_OP2_MASK)
+ >> CP_REG_ARM64_SYSREG_OP2_SHIFT;
+
+ /* Non-redirected access to this register will abort. */
+ r2->readfn = NULL;
+ r2->writefn = NULL;
+ r2->raw_readfn = NULL;
+ r2->raw_writefn = NULL;
+ r2->accessfn = NULL;
+ r2->fieldoffset = 0;
-void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
- const ARMCPRegInfo *r, void *opaque)
+ /*
+ * If the _EL1 register is redirected to memory by FEAT_NV2,
+ * then it shares the offset with the _EL12 register,
+ * and which one is redirected depends on HCR_EL2.NV1.
+ */
+ if (r2->nv2_redirect_offset) {
+ assert(r2->nv2_redirect_offset & NV2_REDIR_NV1);
+ r2->nv2_redirect_offset &= ~NV2_REDIR_NV1;
+ r2->nv2_redirect_offset |= NV2_REDIR_NO_NV1;
+ }
+ add_cpreg_to_hashtable(cpu, r2, ARM_CP_STATE_AA64,
+ ARM_CP_SECSTATE_NS, key2);
+ }
+
+ add_cpreg_to_hashtable(cpu, r, ARM_CP_STATE_AA64,
+ ARM_CP_SECSTATE_NS, key);
+}
+
+void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *r)
{
/*
* Define implementations of coprocessor registers.
@@ -7571,21 +7443,27 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
* bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of
* the register, if any.
*/
- int crm, opc1, opc2;
int crmmin = (r->crm == CP_ANY) ? 0 : r->crm;
int crmmax = (r->crm == CP_ANY) ? 15 : r->crm;
int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1;
int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1;
int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2;
int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2;
- CPState state;
+ int cp = r->cp;
+ ARMCPRegInfo r_const;
+ CPUARMState *env = &cpu->env;
- /* 64 bit registers have only CRm and Opc1 fields */
- assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn)));
+ /*
+ * AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless.
+ * Moreover, the encoding test just following in general prevents
+ * shared encoding so ARM_CP_STATE_BOTH won't work either.
+ */
+ assert(r->state == ARM_CP_STATE_AA32 || !(r->type & ARM_CP_64BIT));
+ /* AArch32 64-bit registers have only CRm and Opc1 fields. */
+ assert(!(r->type & ARM_CP_64BIT) || !(r->opc2 || r->crn));
/* op0 only exists in the AArch64 encodings */
- assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0));
- /* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */
- assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT));
+ assert(r->state != ARM_CP_STATE_AA32 || r->opc0 == 0);
+
/*
* This API is only for Arm's system coprocessors (14 and 15) or
* (M-profile or v7A-and-earlier only) for implementation defined
@@ -7596,21 +7474,25 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
*/
switch (r->state) {
case ARM_CP_STATE_BOTH:
- /* 0 has a special meaning, but otherwise the same rules as AA32. */
- if (r->cp == 0) {
+ /*
+ * If the cp field is left unset, assume cp15.
+ * Otherwise apply the same rules as AA32.
+ */
+ if (cp == 0) {
+ cp = 15;
break;
}
/* fall through */
case ARM_CP_STATE_AA32:
if (arm_feature(&cpu->env, ARM_FEATURE_V8) &&
!arm_feature(&cpu->env, ARM_FEATURE_M)) {
- assert(r->cp >= 14 && r->cp <= 15);
+ assert(cp >= 14 && cp <= 15);
} else {
- assert(r->cp < 8 || (r->cp >= 14 && r->cp <= 15));
+ assert(cp < 8 || (cp >= 14 && cp <= 15));
}
break;
case ARM_CP_STATE_AA64:
- assert(r->cp == 0 || r->cp == CP_REG_ARM64_SYSREG_CP);
+ assert(cp == 0);
break;
default:
g_assert_not_reached();
@@ -7675,75 +7557,104 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
}
}
- for (crm = crmmin; crm <= crmmax; crm++) {
- for (opc1 = opc1min; opc1 <= opc1max; opc1++) {
- for (opc2 = opc2min; opc2 <= opc2max; opc2++) {
- for (state = ARM_CP_STATE_AA32;
- state <= ARM_CP_STATE_AA64; state++) {
- if (r->state != state && r->state != ARM_CP_STATE_BOTH) {
- continue;
- }
- if ((r->type & ARM_CP_ADD_TLBI_NXS) &&
- cpu_isar_feature(aa64_xs, cpu)) {
- /*
- * This is a TLBI insn which has an NXS variant. The
- * NXS variant is at the same encoding except that
- * crn is +1, and has the same behaviour except for
- * fine-grained trapping. Add the NXS insn here and
- * then fall through to add the normal register.
- * add_cpreg_to_hashtable() copies the cpreg struct
- * and name that it is passed, so it's OK to use
- * a local struct here.
- */
- ARMCPRegInfo nxs_ri = *r;
- g_autofree char *name = g_strdup_printf("%sNXS", r->name);
-
- assert(state == ARM_CP_STATE_AA64);
- assert(nxs_ri.crn < 0xf);
- nxs_ri.crn++;
- if (nxs_ri.fgt) {
- nxs_ri.fgt |= R_FGT_NXS_MASK;
- }
- add_cpreg_to_hashtable(cpu, &nxs_ri, opaque, state,
- ARM_CP_SECSTATE_NS,
- crm, opc1, opc2, name);
- }
- if (state == ARM_CP_STATE_AA32) {
- /*
- * Under AArch32 CP registers can be common
- * (same for secure and non-secure world) or banked.
- */
- char *name;
-
- switch (r->secure) {
- case ARM_CP_SECSTATE_S:
- case ARM_CP_SECSTATE_NS:
- add_cpreg_to_hashtable(cpu, r, opaque, state,
- r->secure, crm, opc1, opc2,
- r->name);
- break;
- case ARM_CP_SECSTATE_BOTH:
- name = g_strdup_printf("%s_S", r->name);
- add_cpreg_to_hashtable(cpu, r, opaque, state,
- ARM_CP_SECSTATE_S,
- crm, opc1, opc2, name);
- g_free(name);
- add_cpreg_to_hashtable(cpu, r, opaque, state,
- ARM_CP_SECSTATE_NS,
- crm, opc1, opc2, r->name);
- break;
- default:
- g_assert_not_reached();
- }
- } else {
- /*
- * AArch64 registers get mapped to non-secure instance
- * of AArch32
- */
- add_cpreg_to_hashtable(cpu, r, opaque, state,
- ARM_CP_SECSTATE_NS,
- crm, opc1, opc2, r->name);
- }
+ /*
+ * Eliminate registers that are not present because the EL is missing.
+ * Doing this here makes it easier to put all registers for a given
+ * feature into the same ARMCPRegInfo array and define them all at once.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ /*
+ * An EL2 register without EL2 but with EL3 is (usually) RES0.
+ * See rule RJFFP in section D1.1.3 of DDI0487H.a.
+ */
+ int min_el = ctz32(r->access) / 2;
+ if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) {
+ if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) {
+ return;
+ }
+ if (!(r->type & ARM_CP_EL3_NO_EL2_KEEP)) {
+ /* This should not have been a very special register. */
+ int old_special = r->type & ARM_CP_SPECIAL_MASK;
+ assert(old_special == 0 || old_special == ARM_CP_NOP);
+
+ r_const = *r;
+
+ /*
+ * Set the special function to CONST, retaining the other flags.
+ * This is important for e.g. ARM_CP_SVE so that we still
+ * take the SVE trap if CPTR_EL3.EZ == 0.
+ */
+ r_const.type = (r->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST;
+ /*
+ * Usually, these registers become RES0, but there are a few
+ * special cases like VPIDR_EL2 which have a constant non-zero
+ * value with writes ignored.
+ */
+ if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) {
+ r_const.resetvalue = 0;
+ }
+ /*
+ * ARM_CP_CONST has precedence, so removing the callbacks and
+ * offsets are not strictly necessary, but it is potentially
+ * less confusing to debug later.
+ */
+ r_const.readfn = NULL;
+ r_const.writefn = NULL;
+ r_const.raw_readfn = NULL;
+ r_const.raw_writefn = NULL;
+ r_const.resetfn = NULL;
+ r_const.fieldoffset = 0;
+ r_const.bank_fieldoffsets[0] = 0;
+ r_const.bank_fieldoffsets[1] = 0;
+
+ r = &r_const;
+ }
+ }
+ } else {
+ CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2)
+ ? PL2_RW : PL1_RW);
+ if ((r->access & max_el) == 0) {
+ return;
+ }
+ }
+
+ for (int crm = crmmin; crm <= crmmax; crm++) {
+ for (int opc1 = opc1min; opc1 <= opc1max; opc1++) {
+ for (int opc2 = opc2min; opc2 <= opc2max; opc2++) {
+ ARMCPRegInfo *r2 = alloc_cpreg(r, NULL);
+ ARMCPRegInfo *r3;
+
+ /*
+ * By convention, for wildcarded registers only the first
+ * entry is used for migration; the others are marked as
+ * ALIAS so we don't try to transfer the register
+ * multiple times.
+ */
+ if (crm != crmmin || opc1 != opc1min || opc2 != opc2min) {
+ r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB;
+ }
+
+ /* Overwrite CP_ANY with the instantiation. */
+ r2->crm = crm;
+ r2->opc1 = opc1;
+ r2->opc2 = opc2;
+
+ switch (r->state) {
+ case ARM_CP_STATE_AA32:
+ add_cpreg_to_hashtable_aa32(cpu, r2);
+ break;
+ case ARM_CP_STATE_AA64:
+ add_cpreg_to_hashtable_aa64(cpu, r2);
+ break;
+ case ARM_CP_STATE_BOTH:
+ r3 = alloc_cpreg(r2, NULL);
+ r2->cp = cp;
+ add_cpreg_to_hashtable_aa32(cpu, r2);
+ r3->cp = 0;
+ add_cpreg_to_hashtable_aa64(cpu, r3);
+ break;
+ default:
+ g_assert_not_reached();
}
}
}
@@ -7751,12 +7662,10 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
}
/* Define a whole list of registers */
-void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs,
- void *opaque, size_t len)
+void define_arm_cp_regs_len(ARMCPU *cpu, const ARMCPRegInfo *regs, size_t len)
{
- size_t i;
- for (i = 0; i < len; ++i) {
- define_one_arm_cp_reg_with_opaque(cpu, regs + i, opaque);
+ for (size_t i = 0; i < len; ++i) {
+ define_one_arm_cp_reg(cpu, regs + i);
}
}
@@ -7818,7 +7727,7 @@ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri)
return 0;
}
-void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque)
+void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *ri)
{
/* Helper coprocessor reset function for do-nothing-on-reset registers */
}
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index b77db99..0658a99 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -152,9 +152,6 @@ void hvf_arm_init_debug(void)
g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps);
}
-#define HVF_SYSREG(crn, crm, op0, op1, op2) \
- ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
-
#define SYSREG_OP0_SHIFT 20
#define SYSREG_OP0_MASK 0x3
#define SYSREG_OP0(sysreg) ((sysreg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK)
@@ -397,156 +394,34 @@ static const struct hvf_reg_match hvf_fpreg_match[] = {
{ HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) },
};
-struct hvf_sreg_match {
- int reg;
- uint32_t key;
- uint32_t cp_idx;
-};
+/*
+ * QEMU uses KVM system register ids in the migration format.
+ * Conveniently, HVF uses the same encoding of the op* and cr* parameters
+ * within the low 16 bits of the ids. Thus conversion between the
+ * formats is trivial.
+ */
-static struct hvf_sreg_match hvf_sreg_match[] = {
- { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) },
-
- { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) },
- { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) },
- { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) },
- { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) },
-
-#ifdef SYNC_NO_RAW_REGS
- /*
- * The registers below are manually synced on init because they are
- * marked as NO_RAW. We still list them to make number space sync easier.
- */
- { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) },
- { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) },
- { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
- { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
-#endif
- { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) },
- { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
- { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
- { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
- { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) },
-#ifdef SYNC_NO_MMFR0
- /* We keep the hardware MMFR0 around. HW limits are there anyway */
- { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) },
-#endif
- { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) },
- { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) },
- /* Add ID_AA64MMFR3_EL1 here when HVF supports it */
-
- { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) },
- { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) },
- { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) },
- { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) },
- { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) },
- { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) },
-
- { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) },
- { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) },
- { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) },
- { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) },
- { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) },
- { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) },
- { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) },
- { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) },
- { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) },
- { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) },
-
- { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) },
- { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) },
- { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) },
- { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) },
- { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) },
- { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) },
- { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) },
- { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) },
- { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) },
- { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) },
- { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) },
- { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) },
- { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) },
- { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) },
- { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) },
- { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) },
- { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) },
- { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) },
- { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) },
- { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) },
+#define KVMID_TO_HVF(KVM) ((KVM) & 0xffff)
+#define HVF_TO_KVMID(HVF) \
+ (CP_REG_ARM64 | CP_REG_SIZE_U64 | CP_REG_ARM64_SYSREG | (HVF))
+
+/* Verify this at compile-time. */
+
+#define DEF_SYSREG(HVF_ID, ...) \
+ QEMU_BUILD_BUG_ON(HVF_ID != KVMID_TO_HVF(KVMID_AA64_SYS_REG64(__VA_ARGS__)));
+
+#include "sysreg.c.inc"
+
+#undef DEF_SYSREG
+
+#define DEF_SYSREG(HVF_ID, op0, op1, crn, crm, op2) HVF_ID,
+
+static const hv_sys_reg_t hvf_sreg_list[] = {
+#include "sysreg.c.inc"
};
+#undef DEF_SYSREG
+
int hvf_get_registers(CPUState *cpu)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -554,7 +429,7 @@ int hvf_get_registers(CPUState *cpu)
hv_return_t ret;
uint64_t val;
hv_simd_fp_uchar16_t fpval;
- int i;
+ int i, n;
for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
ret = hv_vcpu_get_reg(cpu->accel->fd, hvf_reg_match[i].reg, &val);
@@ -583,14 +458,13 @@ int hvf_get_registers(CPUState *cpu)
assert_hvf_ok(ret);
pstate_write(env, val);
- for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
- if (hvf_sreg_match[i].cp_idx == -1) {
- continue;
- }
+ for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) {
+ uint64_t kvm_id = arm_cpu->cpreg_indexes[i];
+ int hvf_id = KVMID_TO_HVF(kvm_id);
if (cpu->accel->guest_debug_enabled) {
/* Handle debug registers */
- switch (hvf_sreg_match[i].reg) {
+ switch (hvf_id) {
case HV_SYS_REG_DBGBVR0_EL1:
case HV_SYS_REG_DBGBCR0_EL1:
case HV_SYS_REG_DBGWVR0_EL1:
@@ -664,20 +538,22 @@ int hvf_get_registers(CPUState *cpu)
* vCPU but simply keep the values from the previous
* environment.
*/
- const ARMCPRegInfo *ri;
- ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key);
+ uint32_t key = kvm_to_cpreg_id(kvm_id);
+ const ARMCPRegInfo *ri =
+ get_arm_cp_reginfo(arm_cpu->cp_regs, key);
+
val = read_raw_cp_reg(env, ri);
- arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
+ arm_cpu->cpreg_values[i] = val;
continue;
}
}
}
- ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, &val);
+ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_id, &val);
assert_hvf_ok(ret);
- arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
+ arm_cpu->cpreg_values[i] = val;
}
assert(write_list_to_cpustate(arm_cpu));
@@ -693,7 +569,7 @@ int hvf_put_registers(CPUState *cpu)
hv_return_t ret;
uint64_t val;
hv_simd_fp_uchar16_t fpval;
- int i;
+ int i, n;
for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
@@ -720,14 +596,13 @@ int hvf_put_registers(CPUState *cpu)
aarch64_save_sp(env, arm_current_el(env));
assert(write_cpustate_to_list(arm_cpu, false));
- for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
- if (hvf_sreg_match[i].cp_idx == -1) {
- continue;
- }
+ for (i = 0, n = arm_cpu->cpreg_array_len; i < n; i++) {
+ uint64_t kvm_id = arm_cpu->cpreg_indexes[i];
+ int hvf_id = KVMID_TO_HVF(kvm_id);
if (cpu->accel->guest_debug_enabled) {
/* Handle debug registers */
- switch (hvf_sreg_match[i].reg) {
+ switch (hvf_id) {
case HV_SYS_REG_DBGBVR0_EL1:
case HV_SYS_REG_DBGBCR0_EL1:
case HV_SYS_REG_DBGWVR0_EL1:
@@ -801,8 +676,8 @@ int hvf_put_registers(CPUState *cpu)
}
}
- val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx];
- ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, val);
+ val = arm_cpu->cpreg_values[i];
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_id, val);
assert_hvf_ok(ret);
}
@@ -869,6 +744,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
} regs[] = {
{ HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.idregs[ID_AA64PFR0_EL1_IDX] },
{ HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.idregs[ID_AA64PFR1_EL1_IDX] },
+ /* Add ID_AA64PFR2_EL1 here when HVF supports it */
{ HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.idregs[ID_AA64DFR0_EL1_IDX] },
{ HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.idregs[ID_AA64DFR1_EL1_IDX] },
{ HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.idregs[ID_AA64ISAR0_EL1_IDX] },
@@ -1012,7 +888,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
{
ARMCPU *arm_cpu = ARM_CPU(cpu);
CPUARMState *env = &arm_cpu->env;
- uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match);
+ uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_list);
uint32_t sregs_cnt = 0;
uint64_t pfr;
hv_return_t ret;
@@ -1037,21 +913,22 @@ int hvf_arch_init_vcpu(CPUState *cpu)
/* Populate cp list for all known sysregs */
for (i = 0; i < sregs_match_len; i++) {
- const ARMCPRegInfo *ri;
- uint32_t key = hvf_sreg_match[i].key;
+ hv_sys_reg_t hvf_id = hvf_sreg_list[i];
+ uint64_t kvm_id = HVF_TO_KVMID(hvf_id);
+ uint32_t key = kvm_to_cpreg_id(kvm_id);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
- ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
if (ri) {
assert(!(ri->type & ARM_CP_NO_RAW));
- hvf_sreg_match[i].cp_idx = sregs_cnt;
- arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key);
- } else {
- hvf_sreg_match[i].cp_idx = -1;
+ arm_cpu->cpreg_indexes[sregs_cnt++] = kvm_id;
}
}
arm_cpu->cpreg_array_len = sregs_cnt;
arm_cpu->cpreg_vmstate_array_len = sregs_cnt;
+ /* cpreg tuples must be in strictly ascending order */
+ qsort(arm_cpu->cpreg_indexes, sregs_cnt, sizeof(uint64_t), compare_u64);
+
assert(write_cpustate_to_list(arm_cpu, false));
/* Set CP_NO_RAW system registers on init */
@@ -1248,11 +1125,10 @@ static bool is_id_sysreg(uint32_t reg)
static uint32_t hvf_reg2cp_reg(uint32_t reg)
{
- return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+ return ENCODE_AA64_CP_REG((reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK,
+ (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK,
(reg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK,
(reg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK,
- (reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK,
- (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK,
(reg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK);
}
@@ -1277,7 +1153,7 @@ static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
} else if (ri->readfn) {
*val = ri->readfn(env, ri);
} else {
- *val = CPREG_FIELD64(env, ri);
+ *val = raw_read(env, ri);
}
trace_hvf_vgic_read(ri->name, *val);
return true;
@@ -1559,7 +1435,7 @@ static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val)
if (ri->writefn) {
ri->writefn(env, ri, val);
} else {
- CPREG_FIELD64(env, ri) = val;
+ raw_write(env, ri, val);
}
trace_hvf_vgic_write(ri->name, val);
diff --git a/target/arm/hvf/sysreg.c.inc b/target/arm/hvf/sysreg.c.inc
new file mode 100644
index 0000000..067a860
--- /dev/null
+++ b/target/arm/hvf/sysreg.c.inc
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR0_EL1, 2, 0, 0, 0, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR0_EL1, 2, 0, 0, 0, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR0_EL1, 2, 0, 0, 0, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR0_EL1, 2, 0, 0, 0, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR1_EL1, 2, 0, 0, 1, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR1_EL1, 2, 0, 0, 1, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR1_EL1, 2, 0, 0, 1, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR1_EL1, 2, 0, 0, 1, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR2_EL1, 2, 0, 0, 2, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR2_EL1, 2, 0, 0, 2, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR2_EL1, 2, 0, 0, 2, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR2_EL1, 2, 0, 0, 2, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR3_EL1, 2, 0, 0, 3, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR3_EL1, 2, 0, 0, 3, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR3_EL1, 2, 0, 0, 3, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR3_EL1, 2, 0, 0, 3, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR4_EL1, 2, 0, 0, 4, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR4_EL1, 2, 0, 0, 4, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR4_EL1, 2, 0, 0, 4, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR4_EL1, 2, 0, 0, 4, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR5_EL1, 2, 0, 0, 5, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR5_EL1, 2, 0, 0, 5, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR5_EL1, 2, 0, 0, 5, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR5_EL1, 2, 0, 0, 5, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR6_EL1, 2, 0, 0, 6, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR6_EL1, 2, 0, 0, 6, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR6_EL1, 2, 0, 0, 6, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR6_EL1, 2, 0, 0, 6, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR7_EL1, 2, 0, 0, 7, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR7_EL1, 2, 0, 0, 7, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR7_EL1, 2, 0, 0, 7, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR7_EL1, 2, 0, 0, 7, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR8_EL1, 2, 0, 0, 8, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR8_EL1, 2, 0, 0, 8, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR8_EL1, 2, 0, 0, 8, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR8_EL1, 2, 0, 0, 8, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR9_EL1, 2, 0, 0, 9, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR9_EL1, 2, 0, 0, 9, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR9_EL1, 2, 0, 0, 9, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR9_EL1, 2, 0, 0, 9, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR10_EL1, 2, 0, 0, 10, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR10_EL1, 2, 0, 0, 10, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR10_EL1, 2, 0, 0, 10, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR10_EL1, 2, 0, 0, 10, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR11_EL1, 2, 0, 0, 11, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR11_EL1, 2, 0, 0, 11, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR11_EL1, 2, 0, 0, 11, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR11_EL1, 2, 0, 0, 11, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR12_EL1, 2, 0, 0, 12, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR12_EL1, 2, 0, 0, 12, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR12_EL1, 2, 0, 0, 12, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR12_EL1, 2, 0, 0, 12, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR13_EL1, 2, 0, 0, 13, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR13_EL1, 2, 0, 0, 13, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR13_EL1, 2, 0, 0, 13, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR13_EL1, 2, 0, 0, 13, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR14_EL1, 2, 0, 0, 14, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR14_EL1, 2, 0, 0, 14, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR14_EL1, 2, 0, 0, 14, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR14_EL1, 2, 0, 0, 14, 7)
+
+DEF_SYSREG(HV_SYS_REG_DBGBVR15_EL1, 2, 0, 0, 15, 4)
+DEF_SYSREG(HV_SYS_REG_DBGBCR15_EL1, 2, 0, 0, 15, 5)
+DEF_SYSREG(HV_SYS_REG_DBGWVR15_EL1, 2, 0, 0, 15, 6)
+DEF_SYSREG(HV_SYS_REG_DBGWCR15_EL1, 2, 0, 0, 15, 7)
+
+#ifdef SYNC_NO_RAW_REGS
+/*
+ * The registers below are manually synced on init because they are
+ * marked as NO_RAW. We still list them to make number space sync easier.
+ */
+DEF_SYSREG(HV_SYS_REG_MDCCINT_EL1, 2, 0, 0, 2, 0)
+DEF_SYSREG(HV_SYS_REG_MIDR_EL1, 3, 0, 0, 0, 0)
+DEF_SYSREG(HV_SYS_REG_MPIDR_EL1, 3, 0, 0, 0, 5)
+DEF_SYSREG(HV_SYS_REG_ID_AA64PFR0_EL1, 3, 0, 0, 4, 0)
+#endif
+
+DEF_SYSREG(HV_SYS_REG_ID_AA64PFR1_EL1, 3, 0, 0, 4, 1)
+/* Add ID_AA64PFR2_EL1 here when HVF supports it */
+DEF_SYSREG(HV_SYS_REG_ID_AA64DFR0_EL1, 3, 0, 0, 5, 0)
+DEF_SYSREG(HV_SYS_REG_ID_AA64DFR1_EL1, 3, 0, 0, 5, 1)
+DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0)
+DEF_SYSREG(HV_SYS_REG_ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1)
+
+#ifdef SYNC_NO_MMFR0
+/* We keep the hardware MMFR0 around. HW limits are there anyway */
+DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0)
+#endif
+
+DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR1_EL1, 3, 0, 0, 7, 1)
+DEF_SYSREG(HV_SYS_REG_ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2)
+/* Add ID_AA64MMFR3_EL1 here when HVF supports it */
+
+DEF_SYSREG(HV_SYS_REG_MDSCR_EL1, 2, 0, 0, 2, 2)
+DEF_SYSREG(HV_SYS_REG_SCTLR_EL1, 3, 0, 1, 0, 0)
+DEF_SYSREG(HV_SYS_REG_CPACR_EL1, 3, 0, 1, 0, 2)
+DEF_SYSREG(HV_SYS_REG_TTBR0_EL1, 3, 0, 2, 0, 0)
+DEF_SYSREG(HV_SYS_REG_TTBR1_EL1, 3, 0, 2, 0, 1)
+DEF_SYSREG(HV_SYS_REG_TCR_EL1, 3, 0, 2, 0, 2)
+
+DEF_SYSREG(HV_SYS_REG_APIAKEYLO_EL1, 3, 0, 2, 1, 0)
+DEF_SYSREG(HV_SYS_REG_APIAKEYHI_EL1, 3, 0, 2, 1, 1)
+DEF_SYSREG(HV_SYS_REG_APIBKEYLO_EL1, 3, 0, 2, 1, 2)
+DEF_SYSREG(HV_SYS_REG_APIBKEYHI_EL1, 3, 0, 2, 1, 3)
+DEF_SYSREG(HV_SYS_REG_APDAKEYLO_EL1, 3, 0, 2, 2, 0)
+DEF_SYSREG(HV_SYS_REG_APDAKEYHI_EL1, 3, 0, 2, 2, 1)
+DEF_SYSREG(HV_SYS_REG_APDBKEYLO_EL1, 3, 0, 2, 2, 2)
+DEF_SYSREG(HV_SYS_REG_APDBKEYHI_EL1, 3, 0, 2, 2, 3)
+DEF_SYSREG(HV_SYS_REG_APGAKEYLO_EL1, 3, 0, 2, 3, 0)
+DEF_SYSREG(HV_SYS_REG_APGAKEYHI_EL1, 3, 0, 2, 3, 1)
+
+DEF_SYSREG(HV_SYS_REG_SPSR_EL1, 3, 0, 4, 0, 0)
+DEF_SYSREG(HV_SYS_REG_ELR_EL1, 3, 0, 4, 0, 1)
+DEF_SYSREG(HV_SYS_REG_SP_EL0, 3, 0, 4, 1, 0)
+DEF_SYSREG(HV_SYS_REG_AFSR0_EL1, 3, 0, 5, 1, 0)
+DEF_SYSREG(HV_SYS_REG_AFSR1_EL1, 3, 0, 5, 1, 1)
+DEF_SYSREG(HV_SYS_REG_ESR_EL1, 3, 0, 5, 2, 0)
+DEF_SYSREG(HV_SYS_REG_FAR_EL1, 3, 0, 6, 0, 0)
+DEF_SYSREG(HV_SYS_REG_PAR_EL1, 3, 0, 7, 4, 0)
+DEF_SYSREG(HV_SYS_REG_MAIR_EL1, 3, 0, 10, 2, 0)
+DEF_SYSREG(HV_SYS_REG_AMAIR_EL1, 3, 0, 10, 3, 0)
+DEF_SYSREG(HV_SYS_REG_VBAR_EL1, 3, 0, 12, 0, 0)
+DEF_SYSREG(HV_SYS_REG_CONTEXTIDR_EL1, 3, 0, 13, 0, 1)
+DEF_SYSREG(HV_SYS_REG_TPIDR_EL1, 3, 0, 13, 0, 4)
+DEF_SYSREG(HV_SYS_REG_CNTKCTL_EL1, 3, 0, 14, 1, 0)
+DEF_SYSREG(HV_SYS_REG_CSSELR_EL1, 3, 2, 0, 0, 0)
+DEF_SYSREG(HV_SYS_REG_TPIDR_EL0, 3, 3, 13, 0, 2)
+DEF_SYSREG(HV_SYS_REG_TPIDRRO_EL0, 3, 3, 13, 0, 3)
+DEF_SYSREG(HV_SYS_REG_CNTV_CTL_EL0, 3, 3, 14, 3, 1)
+DEF_SYSREG(HV_SYS_REG_CNTV_CVAL_EL0, 3, 3, 14, 3, 2)
+DEF_SYSREG(HV_SYS_REG_SP_EL1, 3, 4, 4, 1, 0)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 0f7df97..1d958db 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -2004,4 +2004,7 @@ void vfp_clear_float_status_exc_flags(CPUARMState *env);
void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask);
bool arm_pan_enabled(CPUARMState *env);
+/* Compare uint64_t for qsort and bsearch. */
+int compare_u64(const void *a, const void *b);
+
#endif
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index c44d23d..54ae5da 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -160,9 +160,6 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53);
#define CP_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
#define CP_REG_ARM64_SYSREG_OP2_SHIFT 0
-/* No kernel define but it's useful to QEMU */
-#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT)
-
MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64);
MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK);
MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT);
@@ -180,4 +177,15 @@ MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT);
#undef MISMATCH_CHECK
+#define KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) \
+ (CP_REG_AA64_MASK | CP_REG_ARM64_SYSREG | \
+ ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \
+ ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \
+ ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \
+ ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \
+ ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
+#define KVMID_AA64_SYS_REG64(op0, op1, crn, crm, op2) \
+ (KVMID_AA64_SYS_REG_(op0, op1, crn, crm, op2) | CP_REG_SIZE_U64)
+
#endif
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index c1ec665..b8a1c07 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -324,6 +324,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
err = 0;
} else {
err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR1_EL1_IDX);
+ err |= get_host_cpu_reg(fd, ahcf, ID_AA64PFR2_EL1_IDX);
err |= get_host_cpu_reg(fd, ahcf, ID_AA64SMFR0_EL1_IDX);
err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR0_EL1_IDX);
err |= get_host_cpu_reg(fd, ahcf, ID_AA64DFR1_EL1_IDX);
@@ -718,17 +719,6 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
memory_region_ref(kd->mr);
}
-static int compare_u64(const void *a, const void *b)
-{
- if (*(uint64_t *)a > *(uint64_t *)b) {
- return 1;
- }
- if (*(uint64_t *)a < *(uint64_t *)b) {
- return -1;
- }
- return 0;
-}
-
/*
* cpreg_values are sorted in ascending order by KVM register ID
* (see kvm_arm_init_cpreg_list). This allows us to cheaply find
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 914f149..638ee62 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -28,12 +28,16 @@ arm_user_ss.add(files(
'vfp_fpscr.c',
'el2-stubs.c',
))
+arm_user_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING',
+ if_true: files('common-semi-target.c'))
arm_common_system_ss.add(files('cpu.c'))
arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files(
'cpu32-stubs.c'))
arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
+arm_common_system_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING',
+ if_true: files('common-semi-target.c'))
arm_common_system_ss.add(files(
'arch_dump.c',
'arm-powerctl.c',
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 0189422..17f83f1 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -258,6 +258,11 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
DP_TBFLAG_A64(flags, TBII, tbii);
DP_TBFLAG_A64(flags, TBID, tbid);
+ /* E2H is used by both VHE and NV2. */
+ if (hcr & HCR_E2H) {
+ DP_TBFLAG_A64(flags, E2H, 1);
+ }
+
if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
int sve_el = sve_exception_el(env, el);
@@ -390,9 +395,6 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
}
if (hcr & HCR_NV2) {
DP_TBFLAG_A64(flags, NV2, 1);
- if (hcr & HCR_E2H) {
- DP_TBFLAG_A64(flags, NV2_MEM_E20, 1);
- }
if (env->cp15.sctlr_el[2] & SCTLR_EE) {
DP_TBFLAG_A64(flags, NV2_MEM_BE, 1);
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 37bedc3..a0e3300 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -2455,6 +2455,19 @@ static void gen_sysreg_undef(DisasContext *s, bool isread,
gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
}
+/*
+ * Look up @key, returning the cpreg, which must exist.
+ * Additionally, the new cpreg must also be accessible.
+ */
+static const ARMCPRegInfo *
+redirect_cpreg(DisasContext *s, uint32_t key, bool isread)
+{
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
+ assert(ri);
+ assert(cp_access_ok(s->current_el, ri, isread));
+ return ri;
+}
+
/* MRS - move from system register
* MSR (register) - move to system register
* SYS
@@ -2466,8 +2479,7 @@ static void handle_sys(DisasContext *s, bool isread,
unsigned int op0, unsigned int op1, unsigned int op2,
unsigned int crn, unsigned int crm, unsigned int rt)
{
- uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
- crn, crm, op0, op1, op2);
+ uint32_t key = ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2);
const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
bool need_exit_tb = false;
bool nv_trap_to_el2 = false;
@@ -2561,6 +2573,27 @@ static void handle_sys(DisasContext *s, bool isread,
}
}
+ if (ri->vhe_redir_to_el2 && s->current_el == 2 && s->e2h) {
+ /*
+ * This one of the FOO_EL1 registers which redirect to FOO_EL2
+ * from EL2 when HCR_EL2.E2H is set.
+ */
+ key = ri->vhe_redir_to_el2;
+ ri = redirect_cpreg(s, key, isread);
+ } else if (ri->vhe_redir_to_el01 && s->current_el >= 2) {
+ /*
+ * This is one of the FOO_EL12 or FOO_EL02 registers.
+ * With !E2H, they all UNDEF.
+ * With E2H, from EL2 or EL3, they redirect to FOO_EL1/FOO_EL0.
+ */
+ if (!s->e2h) {
+ gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
+ return;
+ }
+ key = ri->vhe_redir_to_el01;
+ ri = redirect_cpreg(s, key, isread);
+ }
+
if (ri->accessfn || (ri->fgt && s->fgt_active)) {
/* Emit code to perform further access permissions checks at
* runtime; this may result in an exception.
@@ -2603,11 +2636,8 @@ static void handle_sys(DisasContext *s, bool isread,
* We don't use the EL1 register's access function, and
* fine-grained-traps on EL1 also do not apply here.
*/
- key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
- crn, crm, op0, 0, op2);
- ri = get_arm_cp_reginfo(s->cp_regs, key);
- assert(ri);
- assert(cp_access_ok(s->current_el, ri, isread));
+ key = ENCODE_AA64_CP_REG(op0, 0, crn, crm, op2);
+ ri = redirect_cpreg(s, key, isread);
/*
* We might not have done an update_pc earlier, so check we don't
* need it. We could support this in future if necessary.
@@ -10306,10 +10336,11 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
+ dc->e2h = EX_TBFLAG_A64(tb_flags, E2H);
dc->nv = EX_TBFLAG_A64(tb_flags, NV);
dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
- dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
+ dc->nv2_mem_e20 = dc->nv2 && dc->e2h;
dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index ec4755a..f1a6e5e 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -150,6 +150,8 @@ typedef struct DisasContext {
bool trap_eret;
/* True if FEAT_LSE2 SCTLR_ELx.nAA is set */
bool naa;
+ /* True if HCR_EL2.E2H is set */
+ bool e2h;
/* True if FEAT_NV HCR_EL2.NV is enabled */
bool nv;
/* True if NV enabled and HCR_EL2.NV1 is set */
diff --git a/target/arm/trace-events b/target/arm/trace-events
index 4438dce..72a2c7d 100644
--- a/target/arm/trace-events
+++ b/target/arm/trace-events
@@ -13,3 +13,13 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d"
# kvm.c
kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64
+
+# cpu.c
+arm_cpu_reset(uint64_t mp_aff) "cpu %" PRIu64
+arm_emulate_firmware_reset(uint64_t mp_aff, unsigned target_el) "cpu %" PRIu64 " @EL%u"
+
+# arm-powerctl.c
+arm_powerctl_set_cpu_on(uint64_t mp_aff, unsigned target_el, const char *mode, uint64_t entry, uint64_t context_id) "cpu %" PRIu64 " (EL %u, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
+arm_powerctl_set_cpu_on_and_reset(uint64_t mp_aff) "cpu %" PRIu64
+arm_powerctl_set_cpu_off(uint64_t mp_aff) "cpu %" PRIu64
+arm_powerctl_reset_cpu(uint64_t mp_aff) "cpu %" PRIu64
diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.c
index 7c8a59e..aeaeb88 100644
--- a/target/riscv/common-semi-target.h
+++ b/target/riscv/common-semi-target.c
@@ -8,43 +8,42 @@
* SPDX-License-Identifier: GPL-2.0-or-later
*/
-#ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H
-#define TARGET_RISCV_COMMON_SEMI_TARGET_H
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "semihosting/common-semi.h"
-static inline target_ulong common_semi_arg(CPUState *cs, int argno)
+uint64_t common_semi_arg(CPUState *cs, int argno)
{
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
return env->gpr[xA0 + argno];
}
-static inline void common_semi_set_ret(CPUState *cs, target_ulong ret)
+void common_semi_set_ret(CPUState *cs, uint64_t ret)
{
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
env->gpr[xA0] = ret;
}
-static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr)
+bool is_64bit_semihosting(CPUArchState *env)
{
- return (nr == TARGET_SYS_EXIT_EXTENDED || sizeof(target_ulong) == 8);
+ return riscv_cpu_mxl(env) != MXL_RV32;
}
-static inline bool is_64bit_semihosting(CPUArchState *env)
+bool common_semi_sys_exit_is_extended(CPUState *cs)
{
- return riscv_cpu_mxl(env) != MXL_RV32;
+ return is_64bit_semihosting(cpu_env(cs));
}
-static inline target_ulong common_semi_stack_bottom(CPUState *cs)
+uint64_t common_semi_stack_bottom(CPUState *cs)
{
RISCVCPU *cpu = RISCV_CPU(cs);
CPURISCVState *env = &cpu->env;
return env->gpr[xSP];
}
-static inline bool common_semi_has_synccache(CPUArchState *env)
+bool common_semi_has_synccache(CPUArchState *env)
{
return true;
}
-
-#endif
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index a4bd61e..fdefe88 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -8,6 +8,10 @@ gen = [
riscv_ss = ss.source_set()
riscv_ss.add(gen)
+
+riscv_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING',
+ if_true: files('common-semi-target.c'))
+
riscv_ss.add(files(
'cpu.c',
'cpu_helper.c',
diff --git a/tests/functional/x86_64/meson.build b/tests/functional/x86_64/meson.build
index 967426c..f78eec5 100644
--- a/tests/functional/x86_64/meson.build
+++ b/tests/functional/x86_64/meson.build
@@ -33,6 +33,7 @@ tests_x86_64_system_thorough = [
'replay',
'reverse_debug',
'tuxrun',
+ 'vfio_user_client',
'virtio_balloon',
'virtio_gpu',
]
diff --git a/tests/functional/x86_64/test_vfio_user_client.py b/tests/functional/x86_64/test_vfio_user_client.py
new file mode 100755
index 0000000..8bc16e5
--- /dev/null
+++ b/tests/functional/x86_64/test_vfio_user_client.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2025 Nutanix, Inc.
+#
+# Author:
+# Mark Cave-Ayland <mark.caveayland@nutanix.com>
+# John Levon <john.levon@nutanix.com>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+"""
+Check basic vfio-user-pci client functionality. The test starts two VMs:
+
+ - the server VM runs the libvfio-user "gpio" example server inside it,
+ piping vfio-user traffic between a local UNIX socket and a virtio-serial
+ port. On the host, the virtio-serial port is backed by a local socket.
+
+ - the client VM loads the gpio-pci-idio-16 kernel module, with the
+ vfio-user client connecting to the above local UNIX socket.
+
+This way, we don't depend on trying to run a vfio-user server on the host
+itself.
+
+Once both VMs are running, we run some basic configuration on the gpio device
+and verify that the server is logging the expected out. As this is consistent
+given the same VM images, we just do a simple direct comparison.
+"""
+
+import os
+
+from qemu_test import Asset
+from qemu_test import QemuSystemTest
+from qemu_test import exec_command_and_wait_for_pattern
+from qemu_test import wait_for_console_pattern
+
+# Exact output can vary, so we just sample for some expected lines.
+EXPECTED_SERVER_LINES = [
+ "gpio: adding DMA region [0, 0xc0000) offset=0 flags=0x3",
+ "gpio: devinfo flags 0x3, num_regions 9, num_irqs 5",
+ "gpio: region_info[0] offset 0 flags 0 size 0 argsz 32",
+ "gpio: region_info[1] offset 0 flags 0 size 0 argsz 32",
+ "gpio: region_info[2] offset 0 flags 0x3 size 256 argsz 32",
+ "gpio: region_info[3] offset 0 flags 0 size 0 argsz 32",
+ "gpio: region_info[4] offset 0 flags 0 size 0 argsz 32",
+ "gpio: region_info[5] offset 0 flags 0 size 0 argsz 32",
+ "gpio: region_info[7] offset 0 flags 0x3 size 256 argsz 32",
+ "gpio: region7: read 256 bytes at 0",
+ "gpio: region7: read 0 from (0x30:4)",
+ "gpio: cleared EROM",
+ "gpio: I/O space enabled",
+ "gpio: memory space enabled",
+ "gpio: SERR# enabled",
+ "gpio: region7: wrote 0x103 to (0x4:2)",
+ "gpio: I/O space enabled",
+ "gpio: memory space enabled",
+]
+
+class VfioUserClient(QemuSystemTest):
+ """vfio-user testing class."""
+
+ ASSET_REPO = 'https://github.com/mcayland-ntx/libvfio-user-test'
+
+ ASSET_KERNEL = Asset(
+ f'{ASSET_REPO}/raw/refs/heads/main/images/bzImage',
+ '40292fa6ce95d516e26bccf5974e138d0db65a6de0bc540cabae060fe9dea605'
+ )
+
+ ASSET_ROOTFS = Asset(
+ f'{ASSET_REPO}/raw/refs/heads/main/images/rootfs.ext2',
+ 'e1e3abae8aebb8e6e77f08b1c531caeacf46250c94c815655c6bbea59fc3d1c1'
+ )
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.kernel_path = None
+ self.rootfs_path = None
+
+ def configure_server_vm_args(self, server_vm, sock_path):
+ """
+ Configuration for the server VM. Set up virtio-serial device backed by
+ the given socket path.
+ """
+ server_vm.add_args('-kernel', self.kernel_path)
+ server_vm.add_args('-append', 'console=ttyS0 root=/dev/sda')
+ server_vm.add_args('-drive',
+ f"file={self.rootfs_path},if=ide,format=raw,id=drv0")
+ server_vm.add_args('-snapshot')
+ server_vm.add_args('-chardev',
+ f"socket,id=sock0,path={sock_path},telnet=off,server=on,wait=off")
+ server_vm.add_args('-device', 'virtio-serial')
+ server_vm.add_args('-device',
+ 'virtserialport,chardev=sock0,name=org.fedoraproject.port.0')
+
+ def configure_client_vm_args(self, client_vm, sock_path):
+ """
+ Configuration for the client VM. Point the vfio-user-pci device to the
+ socket path configured above.
+ """
+
+ client_vm.add_args('-kernel', self.kernel_path)
+ client_vm.add_args('-append', 'console=ttyS0 root=/dev/sda')
+ client_vm.add_args('-drive',
+ f'file={self.rootfs_path},if=ide,format=raw,id=drv0')
+ client_vm.add_args('-snapshot')
+ client_vm.add_args('-device',
+ '{"driver":"vfio-user-pci",' +
+ '"socket":{"path": "%s", "type": "unix"}}' % sock_path)
+
+ def setup_vfio_user_pci_server(self, server_vm):
+ """
+ Start the libvfio-user server within the server VM, and arrange
+ for data to shuttle between its socket and the virtio serial port.
+ """
+ wait_for_console_pattern(self, 'login:', None, server_vm)
+ exec_command_and_wait_for_pattern(self, 'root', '#', None, server_vm)
+
+ exec_command_and_wait_for_pattern(self,
+ 'gpio-pci-idio-16 -v /tmp/vfio-user.sock >/var/tmp/gpio.out 2>&1 &',
+ '#', None, server_vm)
+
+ # wait for libvfio-user socket to appear
+ while True:
+ out = exec_command_and_wait_for_pattern(self,
+ 'ls --color=no /tmp/vfio-user.sock', '#', None, server_vm)
+ ls_out = out.decode().splitlines()[1].strip()
+ if ls_out == "/tmp/vfio-user.sock":
+ break
+
+ exec_command_and_wait_for_pattern(self,
+ 'socat UNIX-CONNECT:/tmp/vfio-user.sock /dev/vport0p1,ignoreeof ' +
+ ' &', '#', None, server_vm)
+
+ def test_vfio_user_pci(self):
+ """Run basic sanity test."""
+
+ self.set_machine('pc')
+ self.require_device('virtio-serial')
+ self.require_device('vfio-user-pci')
+
+ self.kernel_path = self.ASSET_KERNEL.fetch()
+ self.rootfs_path = self.ASSET_ROOTFS.fetch()
+
+ sock_dir = self.socket_dir()
+ socket_path = os.path.join(sock_dir.name, 'vfio-user.sock')
+
+ server_vm = self.get_vm(name='server')
+ server_vm.set_console()
+ self.configure_server_vm_args(server_vm, socket_path)
+
+ server_vm.launch()
+
+ self.log.debug('starting libvfio-user server')
+
+ self.setup_vfio_user_pci_server(server_vm)
+
+ client_vm = self.get_vm(name="client")
+ client_vm.set_console()
+ self.configure_client_vm_args(client_vm, socket_path)
+
+ try:
+ client_vm.launch()
+ except:
+ self.log.error('client VM failed to start, dumping server logs')
+ exec_command_and_wait_for_pattern(self, 'cat /var/tmp/gpio.out',
+ '#', None, server_vm)
+ raise
+
+ self.log.debug('waiting for client VM boot')
+
+ wait_for_console_pattern(self, 'login:', None, client_vm)
+ exec_command_and_wait_for_pattern(self, 'root', '#', None, client_vm)
+
+ #
+ # Here, we'd like to actually interact with the gpio device a little
+ # more as described at:
+ #
+ # https://github.com/nutanix/libvfio-user/blob/master/docs/qemu.md
+ #
+ # Unfortunately, the buildroot Linux kernel has some undiagnosed issue
+ # so we don't get /sys/class/gpio. Nonetheless just the basic
+ # initialization and setup is enough for basic testing of vfio-user.
+ #
+
+ self.log.debug('collecting libvfio-user server output')
+
+ out = exec_command_and_wait_for_pattern(self,
+ 'cat /var/tmp/gpio.out',
+ 'gpio: region2: wrote 0 to (0x1:1)',
+ None, server_vm)
+
+ gpio_server_out = [s for s in out.decode().splitlines()
+ if s.startswith("gpio:")]
+
+ for line in EXPECTED_SERVER_LINES:
+ if line not in gpio_server_out:
+ self.log.error(f'Missing server debug line: {line}')
+ self.fail(False)
+
+
+if __name__ == '__main__':
+ QemuSystemTest.main()