aboutsummaryrefslogtreecommitdiff
path: root/hw/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'hw/riscv')
-rw-r--r--hw/riscv/Kconfig14
-rw-r--r--hw/riscv/boot.c159
-rw-r--r--hw/riscv/meson.build3
-rw-r--r--hw/riscv/microblaze-v-generic.c189
-rw-r--r--hw/riscv/microchip_pfsoc.c174
-rw-r--r--hw/riscv/numa.c2
-rw-r--r--hw/riscv/opentitan.c17
-rw-r--r--hw/riscv/riscv-iommu-bits.h468
-rw-r--r--hw/riscv/riscv-iommu-hpm.c381
-rw-r--r--hw/riscv/riscv-iommu-hpm.h33
-rw-r--r--hw/riscv/riscv-iommu-pci.c217
-rw-r--r--hw/riscv/riscv-iommu-sys.c248
-rw-r--r--hw/riscv/riscv-iommu.c2679
-rw-r--r--hw/riscv/riscv-iommu.h157
-rw-r--r--hw/riscv/riscv_hart.c105
-rw-r--r--hw/riscv/shakti_c.c21
-rw-r--r--hw/riscv/sifive_e.c11
-rw-r--r--hw/riscv/sifive_u.c40
-rw-r--r--hw/riscv/spike.c25
-rw-r--r--hw/riscv/trace-events26
-rw-r--r--hw/riscv/trace.h1
-rw-r--r--hw/riscv/virt-acpi-build.c278
-rw-r--r--hw/riscv/virt.c549
23 files changed, 5427 insertions, 370 deletions
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index a2030e3..e6a0ac1 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+ bool
+
config RISCV_NUMA
bool
@@ -22,6 +25,14 @@ config MICROCHIP_PFSOC
select SIFIVE_PLIC
select UNIMP
+config MICROBLAZE_V
+ bool
+ default y
+ depends on RISCV32 || RISCV64
+ select XILINX
+ select XILINX_AXI
+ select XILINX_ETHLITE
+
config OPENTITAN
bool
default y
@@ -44,9 +55,10 @@ config RISCV_VIRT
select PCI
select PCI_EXPRESS_GENERIC_BRIDGE
select PFLASH_CFI01
- select SERIAL
+ select SERIAL_MM
select RISCV_ACLINT
select RISCV_APLIC
+ select RISCV_IOMMU
select RISCV_IMSIC
select SIFIVE_PLIC
select SIFIVE_TEST
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 47281ca..828a867 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -27,17 +27,17 @@
#include "hw/riscv/boot.h"
#include "hw/riscv/boot_opensbi.h"
#include "elf.h"
-#include "sysemu/device_tree.h"
-#include "sysemu/qtest.h"
-#include "sysemu/kvm.h"
-#include "sysemu/reset.h"
+#include "system/device_tree.h"
+#include "system/qtest.h"
+#include "system/kvm.h"
+#include "system/reset.h"
#include <libfdt.h>
bool riscv_is_32bit(RISCVHartArrayState *harts)
{
RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(&harts->harts[0]);
- return mcc->misa_mxl_max == MXL_RV32;
+ return mcc->def->misa_mxl_max == MXL_RV32;
}
/*
@@ -67,9 +67,16 @@ char *riscv_plic_hart_config_string(int hart_count)
return g_strjoinv(",", (char **)vals);
}
-target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts,
+void riscv_boot_info_init(RISCVBootInfo *info, RISCVHartArrayState *harts)
+{
+ info->kernel_size = 0;
+ info->initrd_size = 0;
+ info->is_32bit = riscv_is_32bit(harts);
+}
+
+target_ulong riscv_calc_kernel_start_addr(RISCVBootInfo *info,
target_ulong firmware_end_addr) {
- if (riscv_is_32bit(harts)) {
+ if (info->is_32bit) {
return QEMU_ALIGN_UP(firmware_end_addr, 4 * MiB);
} else {
return QEMU_ALIGN_UP(firmware_end_addr, 2 * MiB);
@@ -128,11 +135,11 @@ char *riscv_find_firmware(const char *firmware_filename,
target_ulong riscv_find_and_load_firmware(MachineState *machine,
const char *default_machine_firmware,
- hwaddr firmware_load_addr,
+ hwaddr *firmware_load_addr,
symbol_fn_t sym_cb)
{
char *firmware_filename;
- target_ulong firmware_end_addr = firmware_load_addr;
+ target_ulong firmware_end_addr = *firmware_load_addr;
firmware_filename = riscv_find_firmware(machine->firmware,
default_machine_firmware);
@@ -148,7 +155,7 @@ target_ulong riscv_find_and_load_firmware(MachineState *machine,
}
target_ulong riscv_load_firmware(const char *firmware_filename,
- hwaddr firmware_load_addr,
+ hwaddr *firmware_load_addr,
symbol_fn_t sym_cb)
{
uint64_t firmware_entry, firmware_end;
@@ -159,22 +166,23 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL,
&firmware_entry, NULL, &firmware_end, NULL,
0, EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
+ *firmware_load_addr = firmware_entry;
return firmware_end;
}
firmware_size = load_image_targphys_as(firmware_filename,
- firmware_load_addr,
+ *firmware_load_addr,
current_machine->ram_size, NULL);
if (firmware_size > 0) {
- return firmware_load_addr + firmware_size;
+ return *firmware_load_addr + firmware_size;
}
error_report("could not load firmware '%s'", firmware_filename);
exit(1);
}
-static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
+static void riscv_load_initrd(MachineState *machine, RISCVBootInfo *info)
{
const char *filename = machine->initrd_filename;
uint64_t mem_size = machine->ram_size;
@@ -195,7 +203,7 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
* halfway into RAM, and for boards with 1GB of RAM or more we put
* the initrd at 512MB.
*/
- start = kernel_entry + MIN(mem_size / 2, 512 * MiB);
+ start = info->image_low_addr + MIN(mem_size / 2, 512 * MiB);
size = load_ramdisk(filename, start, mem_size - start);
if (size == -1) {
@@ -206,6 +214,9 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
}
}
+ info->initrd_start = start;
+ info->initrd_size = size;
+
/* Some RISC-V machines (e.g. opentitan) don't have a fdt. */
if (fdt) {
end = start + size;
@@ -214,14 +225,14 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
}
}
-target_ulong riscv_load_kernel(MachineState *machine,
- RISCVHartArrayState *harts,
- target_ulong kernel_start_addr,
- bool load_initrd,
- symbol_fn_t sym_cb)
+void riscv_load_kernel(MachineState *machine,
+ RISCVBootInfo *info,
+ target_ulong kernel_start_addr,
+ bool load_initrd,
+ symbol_fn_t sym_cb)
{
const char *kernel_filename = machine->kernel_filename;
- uint64_t kernel_load_base, kernel_entry;
+ ssize_t kernel_size;
void *fdt = machine->fdt;
g_assert(kernel_filename != NULL);
@@ -233,21 +244,29 @@ target_ulong riscv_load_kernel(MachineState *machine,
* the (expected) load address load address. This allows kernels to have
* separate SBI and ELF entry points (used by FreeBSD, for example).
*/
- if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
- NULL, &kernel_load_base, NULL, NULL, 0,
- EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
- kernel_entry = kernel_load_base;
+ kernel_size = load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL,
+ &info->image_low_addr, &info->image_high_addr,
+ NULL, ELFDATA2LSB, EM_RISCV,
+ 1, 0, NULL, true, sym_cb);
+ if (kernel_size > 0) {
+ info->kernel_size = kernel_size;
goto out;
}
- if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL,
- NULL, NULL, NULL) > 0) {
+ kernel_size = load_uimage_as(kernel_filename, &info->image_low_addr,
+ NULL, NULL, NULL, NULL, NULL);
+ if (kernel_size > 0) {
+ info->kernel_size = kernel_size;
+ info->image_high_addr = info->image_low_addr + kernel_size;
goto out;
}
- if (load_image_targphys_as(kernel_filename, kernel_start_addr,
- current_machine->ram_size, NULL) > 0) {
- kernel_entry = kernel_start_addr;
+ kernel_size = load_image_targphys_as(kernel_filename, kernel_start_addr,
+ current_machine->ram_size, NULL);
+ if (kernel_size > 0) {
+ info->kernel_size = kernel_size;
+ info->image_low_addr = kernel_start_addr;
+ info->image_high_addr = info->image_low_addr + kernel_size;
goto out;
}
@@ -256,23 +275,21 @@ target_ulong riscv_load_kernel(MachineState *machine,
out:
/*
- * For 32 bit CPUs 'kernel_entry' can be sign-extended by
+ * For 32 bit CPUs 'image_low_addr' can be sign-extended by
* load_elf_ram_sym().
*/
- if (riscv_is_32bit(harts)) {
- kernel_entry = extract64(kernel_entry, 0, 32);
+ if (info->is_32bit) {
+ info->image_low_addr = extract64(info->image_low_addr, 0, 32);
}
if (load_initrd && machine->initrd_filename) {
- riscv_load_initrd(machine, kernel_entry);
+ riscv_load_initrd(machine, info);
}
if (fdt && machine->kernel_cmdline && *machine->kernel_cmdline) {
qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
machine->kernel_cmdline);
}
-
- return kernel_entry;
}
/*
@@ -292,11 +309,12 @@ out:
* The FDT is fdt_packed() during the calculation.
*/
uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size,
- MachineState *ms)
+ MachineState *ms, RISCVBootInfo *info)
{
int ret = fdt_pack(ms->fdt);
hwaddr dram_end, temp;
int fdtsize;
+ uint64_t dtb_start, dtb_start_limit;
/* Should only fail if we've built a corrupted tree */
g_assert(ret == 0);
@@ -307,6 +325,17 @@ uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size,
exit(1);
}
+ if (info->initrd_size) {
+ /* If initrd is successfully loaded, place DTB after it. */
+ dtb_start_limit = info->initrd_start + info->initrd_size;
+ } else if (info->kernel_size) {
+ /* If only kernel is successfully loaded, place DTB after it. */
+ dtb_start_limit = info->image_high_addr;
+ } else {
+ /* Otherwise, do not check DTB overlapping */
+ dtb_start_limit = 0;
+ }
+
/*
* A dram_size == 0, usually from a MemMapEntry[].size element,
* means that the DRAM block goes all the way to ms->ram_size.
@@ -316,13 +345,24 @@ uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size,
/*
* We should put fdt as far as possible to avoid kernel/initrd overwriting
- * its content. But it should be addressable by 32 bit system as well.
- * Thus, put it at an 2MB aligned address that less than fdt size from the
- * end of dram or 3GB whichever is lesser.
+ * its content. But it should be addressable by 32 bit system as well in RV32.
+ * Thus, put it near to the end of dram in RV64, and put it near to the end
+ * of dram or 3GB whichever is lesser in RV32.
*/
- temp = (dram_base < 3072 * MiB) ? MIN(dram_end, 3072 * MiB) : dram_end;
+ if (!info->is_32bit) {
+ temp = dram_end;
+ } else {
+ temp = (dram_base < 3072 * MiB) ? MIN(dram_end, 3072 * MiB) : dram_end;
+ }
- return QEMU_ALIGN_DOWN(temp - fdtsize, 2 * MiB);
+ dtb_start = QEMU_ALIGN_DOWN(temp - fdtsize, 2 * MiB);
+
+ if (dtb_start_limit && (dtb_start < dtb_start_limit)) {
+ error_report("No enough memory to place DTB after kernel/initrd");
+ exit(1);
+ }
+
+ return dtb_start;
}
/*
@@ -334,35 +374,39 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt)
uint32_t fdtsize = fdt_totalsize(fdt);
/* copy in the device tree */
- qemu_fdt_dumpdtb(fdt, fdtsize);
-
rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr,
&address_space_memory);
qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds,
rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize));
}
-void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base,
- hwaddr rom_size, uint32_t reset_vec_size,
+void riscv_rom_copy_firmware_info(MachineState *machine,
+ RISCVHartArrayState *harts,
+ hwaddr rom_base, hwaddr rom_size,
+ uint32_t reset_vec_size,
uint64_t kernel_entry)
{
+ struct fw_dynamic_info32 dinfo32;
struct fw_dynamic_info dinfo;
size_t dinfo_len;
- if (sizeof(dinfo.magic) == 4) {
- dinfo.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE);
- dinfo.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION);
- dinfo.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S);
- dinfo.next_addr = cpu_to_le32(kernel_entry);
+ if (riscv_is_32bit(harts)) {
+ dinfo32.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE);
+ dinfo32.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION);
+ dinfo32.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S);
+ dinfo32.next_addr = cpu_to_le32(kernel_entry);
+ dinfo32.options = 0;
+ dinfo32.boot_hart = 0;
+ dinfo_len = sizeof(dinfo32);
} else {
dinfo.magic = cpu_to_le64(FW_DYNAMIC_INFO_MAGIC_VALUE);
dinfo.version = cpu_to_le64(FW_DYNAMIC_INFO_VERSION);
dinfo.next_mode = cpu_to_le64(FW_DYNAMIC_INFO_NEXT_MODE_S);
dinfo.next_addr = cpu_to_le64(kernel_entry);
+ dinfo.options = 0;
+ dinfo.boot_hart = 0;
+ dinfo_len = sizeof(dinfo);
}
- dinfo.options = 0;
- dinfo.boot_hart = 0;
- dinfo_len = sizeof(dinfo);
/**
* copy the dynamic firmware info. This information is specific to
@@ -374,7 +418,10 @@ void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base,
exit(1);
}
- rom_add_blob_fixed_as("mrom.finfo", &dinfo, dinfo_len,
+ rom_add_blob_fixed_as("mrom.finfo",
+ riscv_is_32bit(harts) ?
+ (void *)&dinfo32 : (void *)&dinfo,
+ dinfo_len,
rom_base + reset_vec_size,
&address_space_memory);
}
@@ -430,7 +477,9 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts
}
rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
rom_base, &address_space_memory);
- riscv_rom_copy_firmware_info(machine, rom_base, rom_size, sizeof(reset_vec),
+ riscv_rom_copy_firmware_info(machine, harts,
+ rom_base, rom_size,
+ sizeof(reset_vec),
kernel_entry);
}
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index f872674..c22f3a7 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,5 +10,8 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c'))
riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files(
+ 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c'))
+riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/microblaze-v-generic.c b/hw/riscv/microblaze-v-generic.c
new file mode 100644
index 0000000..e863c50
--- /dev/null
+++ b/hw/riscv/microblaze-v-generic.c
@@ -0,0 +1,189 @@
+/*
+ * QEMU model of Microblaze V generic board.
+ *
+ * based on hw/microblaze/petalogix_ml605_mmu.c
+ *
+ * Copyright (c) 2011 Michal Simek <monstr@monstr.eu>
+ * Copyright (c) 2011 PetaLogix
+ * Copyright (c) 2009 Edgar E. Iglesias.
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Written by Sai Pavan Boddu <sai.pavan.boddu@amd.com
+ * and by Michal Simek <michal.simek@amd.com>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "hw/sysbus.h"
+#include "system/system.h"
+#include "net/net.h"
+#include "hw/boards.h"
+#include "hw/char/serial-mm.h"
+#include "system/address-spaces.h"
+#include "hw/char/xilinx_uartlite.h"
+#include "hw/misc/unimp.h"
+
+#define LMB_BRAM_SIZE (128 * KiB)
+#define MEMORY_BASEADDR 0x80000000
+#define INTC_BASEADDR 0x41200000
+#define TIMER_BASEADDR 0x41c00000
+#define TIMER_BASEADDR2 0x41c10000
+#define UARTLITE_BASEADDR 0x40600000
+#define ETHLITE_BASEADDR 0x40e00000
+#define UART16550_BASEADDR 0x44a10000
+#define AXIENET_BASEADDR 0x40c00000
+#define AXIDMA_BASEADDR 0x41e00000
+#define GPIO_BASEADDR 0x40000000
+#define GPIO_BASEADDR2 0x40010000
+#define GPIO_BASEADDR3 0x40020000
+#define I2C_BASEADDR 0x40800000
+#define QSPI_BASEADDR 0x44a00000
+
+#define TIMER_IRQ 0
+#define UARTLITE_IRQ 1
+#define UART16550_IRQ 4
+#define ETHLITE_IRQ 5
+#define TIMER_IRQ2 6
+#define AXIENET_IRQ 7
+#define AXIDMA_IRQ1 8
+#define AXIDMA_IRQ0 9
+
+static void mb_v_generic_init(MachineState *machine)
+{
+ ram_addr_t ram_size = machine->ram_size;
+ DeviceState *dev, *dma, *eth0;
+ Object *ds, *cs;
+ int i;
+ RISCVCPU *cpu;
+ hwaddr ddr_base = MEMORY_BASEADDR;
+ MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
+ MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
+ qemu_irq irq[32];
+ MemoryRegion *sysmem = get_system_memory();
+
+ cpu = RISCV_CPU(object_new(machine->cpu_type));
+ object_property_set_bool(OBJECT(cpu), "h", false, NULL);
+ object_property_set_bool(OBJECT(cpu), "d", false, NULL);
+ qdev_realize(DEVICE(cpu), NULL, &error_abort);
+ /* Attach emulated BRAM through the LMB. */
+ memory_region_init_ram(phys_lmb_bram, NULL,
+ "mb_v.lmb_bram", LMB_BRAM_SIZE,
+ &error_fatal);
+ memory_region_add_subregion(sysmem, 0x00000000, phys_lmb_bram);
+
+ memory_region_init_ram(phys_ram, NULL, "mb_v.ram",
+ ram_size, &error_fatal);
+ memory_region_add_subregion(sysmem, ddr_base, phys_ram);
+
+ dev = qdev_new("xlnx.xps-intc");
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
+ qdev_prop_set_uint32(dev, "kind-of-intr",
+ 1 << UARTLITE_IRQ);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
+ qdev_get_gpio_in(DEVICE(cpu), 11));
+ for (i = 0; i < 32; i++) {
+ irq[i] = qdev_get_gpio_in(dev, i);
+ }
+
+ /* Uartlite */
+ dev = qdev_new(TYPE_XILINX_UARTLITE);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
+ qdev_prop_set_chr(dev, "chardev", serial_hd(0));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, UARTLITE_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[UARTLITE_IRQ]);
+
+ /* Full uart */
+ serial_mm_init(sysmem, UART16550_BASEADDR + 0x1000, 2,
+ irq[UART16550_IRQ], 115200, serial_hd(1),
+ DEVICE_LITTLE_ENDIAN);
+
+ /* 2 timers at irq 0 @ 100 Mhz. */
+ dev = qdev_new("xlnx.xps-timer");
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
+ qdev_prop_set_uint32(dev, "one-timer-only", 0);
+ qdev_prop_set_uint32(dev, "clock-frequency", 100000000);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]);
+
+ /* 2 timers at irq 3 @ 100 Mhz. */
+ dev = qdev_new("xlnx.xps-timer");
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
+ qdev_prop_set_uint32(dev, "one-timer-only", 0);
+ qdev_prop_set_uint32(dev, "clock-frequency", 100000000);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR2);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ2]);
+
+ /* Emaclite */
+ dev = qdev_new("xlnx.xps-ethernetlite");
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
+ qemu_configure_nic_device(dev, true, NULL);
+ qdev_prop_set_uint32(dev, "tx-ping-pong", 0);
+ qdev_prop_set_uint32(dev, "rx-ping-pong", 0);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, ETHLITE_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[ETHLITE_IRQ]);
+
+ /* axi ethernet and dma initialization. */
+ eth0 = qdev_new("xlnx.axi-ethernet");
+ dma = qdev_new("xlnx.axi-dma");
+
+ /* FIXME: attach to the sysbus instead */
+ object_property_add_child(qdev_get_machine(), "xilinx-eth", OBJECT(eth0));
+ object_property_add_child(qdev_get_machine(), "xilinx-dma", OBJECT(dma));
+
+ ds = object_property_get_link(OBJECT(dma),
+ "axistream-connected-target", NULL);
+ cs = object_property_get_link(OBJECT(dma),
+ "axistream-control-connected-target", NULL);
+ qemu_configure_nic_device(eth0, true, NULL);
+ qdev_prop_set_uint32(eth0, "rxmem", 0x1000);
+ qdev_prop_set_uint32(eth0, "txmem", 0x1000);
+ object_property_set_link(OBJECT(eth0), "axistream-connected", ds,
+ &error_abort);
+ object_property_set_link(OBJECT(eth0), "axistream-control-connected", cs,
+ &error_abort);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(eth0), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(eth0), 0, AXIENET_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(eth0), 0, irq[AXIENET_IRQ]);
+
+ ds = object_property_get_link(OBJECT(eth0),
+ "axistream-connected-target", NULL);
+ cs = object_property_get_link(OBJECT(eth0),
+ "axistream-control-connected-target", NULL);
+ qdev_prop_set_uint32(dma, "freqhz", 100000000);
+ object_property_set_link(OBJECT(dma), "axistream-connected", ds,
+ &error_abort);
+ object_property_set_link(OBJECT(dma), "axistream-control-connected", cs,
+ &error_abort);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dma), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dma), 0, AXIDMA_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dma), 0, irq[AXIDMA_IRQ0]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dma), 1, irq[AXIDMA_IRQ1]);
+
+ /* unimplemented devices */
+ create_unimplemented_device("gpio", GPIO_BASEADDR, 0x10000);
+ create_unimplemented_device("gpio2", GPIO_BASEADDR2, 0x10000);
+ create_unimplemented_device("gpio3", GPIO_BASEADDR3, 0x10000);
+ create_unimplemented_device("i2c", I2C_BASEADDR, 0x10000);
+ create_unimplemented_device("qspi", QSPI_BASEADDR, 0x10000);
+}
+
+static void mb_v_generic_machine_init(MachineClass *mc)
+{
+ mc->desc = "AMD Microblaze-V generic platform";
+ mc->init = mb_v_generic_init;
+ mc->min_cpus = 1;
+ mc->max_cpus = 1;
+ mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
+ mc->default_cpus = 1;
+}
+
+DEFINE_MACHINE("amd-microblaze-v-generic", mb_v_generic_machine_init)
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index 7725dfb..2e74783 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -39,6 +39,7 @@
#include "qemu/units.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
+#include "qapi/visitor.h"
#include "hw/boards.h"
#include "hw/loader.h"
#include "hw/sysbus.h"
@@ -51,8 +52,8 @@
#include "hw/riscv/microchip_pfsoc.h"
#include "hw/intc/riscv_aclint.h"
#include "hw/intc/sifive_plic.h"
-#include "sysemu/device_tree.h"
-#include "sysemu/sysemu.h"
+#include "system/device_tree.h"
+#include "system/system.h"
/*
* The BIOS image used by this machine is called Hart Software Services (HSS).
@@ -61,9 +62,6 @@
#define BIOS_FILENAME "hss.bin"
#define RESET_VECTOR 0x20220000
-/* CLINT timebase frequency */
-#define CLINT_TIMEBASE_FREQ 1000000
-
/* GEM version */
#define GEM_REVISION 0x0107010c
@@ -193,6 +191,7 @@ static void microchip_pfsoc_soc_instance_init(Object *obj)
static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
+ MicrochipIcicleKitState *iks = MICROCHIP_ICICLE_KIT_MACHINE(ms);
MicrochipPFSoCState *s = MICROCHIP_PFSOC(dev);
const MemMapEntry *memmap = microchip_pfsoc_memmap;
MemoryRegion *system_memory = get_system_memory();
@@ -253,7 +252,7 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
memmap[MICROCHIP_PFSOC_CLINT].base + RISCV_ACLINT_SWI_SIZE,
RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
- CLINT_TIMEBASE_FREQ, false);
+ iks->clint_timebase_freq, false);
/* L2 cache controller */
create_unimplemented_device("microchip.pfsoc.l2cc",
@@ -479,7 +478,7 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
qspi_xip_mem);
}
-static void microchip_pfsoc_soc_class_init(ObjectClass *oc, void *data)
+static void microchip_pfsoc_soc_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -516,11 +515,11 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
uint64_t mem_low_size, mem_high_size;
hwaddr firmware_load_addr;
const char *firmware_name;
- bool kernel_as_payload = false;
target_ulong firmware_end_addr, kernel_start_addr;
uint64_t kernel_entry;
- uint32_t fdt_load_addr;
+ uint64_t fdt_load_addr;
DriveInfo *dinfo = drive_get(IF_SD, 0, 0);
+ RISCVBootInfo boot_info;
/* Sanity check on RAM size */
if (machine->ram_size < mc->default_ram_size) {
@@ -578,65 +577,135 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
}
/*
- * We follow the following table to select which payload we execute.
- *
- * -bios | -kernel | payload
- * -------+------------+--------
- * N | N | HSS
- * Y | don't care | HSS
- * N | Y | kernel
+ * We follow the following table to select which firmware we use.
*
- * This ensures backwards compatibility with how we used to expose -bios
- * to users but allows them to run through direct kernel booting as well.
- *
- * When -kernel is used for direct boot, -dtb must be present to provide
- * a valid device tree for the board, as we don't generate device tree.
+ * -bios | -kernel | firmware
+ * --------------+------------+--------
+ * none | N | error
+ * none | Y | kernel
+ * NULL, default | N | BIOS_FILENAME
+ * NULL, default | Y | RISCV64_BIOS_BIN
+ * other | don't care | other
*/
-
- if (machine->kernel_filename && machine->dtb) {
- int fdt_size;
- machine->fdt = load_device_tree(machine->dtb, &fdt_size);
- if (!machine->fdt) {
- error_report("load_device_tree() failed");
+ if (machine->firmware && !strcmp(machine->firmware, "none")) {
+ if (!machine->kernel_filename) {
+ error_report("for -bios none, a kernel is required");
exit(1);
}
- firmware_name = RISCV64_BIOS_BIN;
- firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base;
- kernel_as_payload = true;
- }
-
- if (!kernel_as_payload) {
- firmware_name = BIOS_FILENAME;
+ firmware_name = NULL;
+ firmware_load_addr = RESET_VECTOR;
+ } else if (!machine->firmware || !strcmp(machine->firmware, "default")) {
+ if (machine->kernel_filename) {
+ firmware_name = RISCV64_BIOS_BIN;
+ firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base;
+ } else {
+ firmware_name = BIOS_FILENAME;
+ firmware_load_addr = RESET_VECTOR;
+ }
+ } else {
+ firmware_name = machine->firmware;
firmware_load_addr = RESET_VECTOR;
}
- /* Load the firmware */
- firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
- firmware_load_addr, NULL);
+ /* Load the firmware if necessary */
+ firmware_end_addr = firmware_load_addr;
+ if (firmware_name) {
+ char *filename = riscv_find_firmware(firmware_name, NULL);
+ if (filename) {
+ firmware_end_addr = riscv_load_firmware(filename,
+ &firmware_load_addr, NULL);
+ g_free(filename);
+ }
+ }
- if (kernel_as_payload) {
- kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
+ riscv_boot_info_init(&boot_info, &s->soc.u_cpus);
+ if (machine->kernel_filename) {
+ kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info,
firmware_end_addr);
- kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
- kernel_start_addr, true, NULL);
+ riscv_load_kernel(machine, &boot_info, kernel_start_addr,
+ true, NULL);
+ kernel_entry = boot_info.image_low_addr;
+
+ if (machine->dtb) {
+ int fdt_size;
+ machine->fdt = load_device_tree(machine->dtb, &fdt_size);
+ if (!machine->fdt) {
+ error_report("load_device_tree() failed");
+ exit(1);
+ }
+
+ /* Compute the FDT load address in DRAM */
+ hwaddr kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_LO].base;
+ hwaddr kernel_ram_size = memmap[MICROCHIP_PFSOC_DRAM_LO].size;
+
+ if (kernel_entry - kernel_ram_base >= kernel_ram_size) {
+ kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_HI].base;
+ kernel_ram_size = mem_high_size;
+ }
+
+ fdt_load_addr = riscv_compute_fdt_addr(kernel_ram_base, kernel_ram_size,
+ machine, &boot_info);
+ riscv_load_fdt(fdt_load_addr, machine->fdt);
+ } else {
+ warn_report_once("The QEMU microchip-icicle-kit machine does not "
+ "generate a device tree, so no device tree is "
+ "being provided to the guest.");
+ fdt_load_addr = 0;
+ }
- /* Compute the fdt load address in dram */
- fdt_load_addr = riscv_compute_fdt_addr(memmap[MICROCHIP_PFSOC_DRAM_LO].base,
- memmap[MICROCHIP_PFSOC_DRAM_LO].size,
- machine);
- riscv_load_fdt(fdt_load_addr, machine->fdt);
+ hwaddr start_addr;
+ if (firmware_name) {
+ start_addr = firmware_load_addr;
+ } else {
+ start_addr = kernel_entry;
+ }
/* Load the reset vector */
- riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, firmware_load_addr,
+ riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, start_addr,
memmap[MICROCHIP_PFSOC_ENVM_DATA].base,
memmap[MICROCHIP_PFSOC_ENVM_DATA].size,
kernel_entry, fdt_load_addr);
}
}
-static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
+static void microchip_icicle_kit_set_clint_timebase_freq(Object *obj,
+ Visitor *v,
+ const char *name,
+ void *opaque,
+ Error **errp)
+{
+ MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj);
+ uint32_t value;
+
+ if (!visit_type_uint32(v, name, &value, errp)) {
+ return;
+ }
+
+ s->clint_timebase_freq = value;
+}
+
+static void microchip_icicle_kit_get_clint_timebase_freq(Object *obj,
+ Visitor *v,
+ const char *name,
+ void *opaque,
+ Error **errp)
+{
+ MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj);
+ uint32_t value = s->clint_timebase_freq;
+
+ visit_type_uint32(v, name, &value, errp);
+}
+
+static void microchip_icicle_kit_machine_instance_init(Object *obj)
+{
+ MicrochipIcicleKitState *m = MICROCHIP_ICICLE_KIT_MACHINE(obj);
+ m->clint_timebase_freq = 1000000;
+}
+
+static void microchip_icicle_kit_machine_class_init(ObjectClass *oc,
+ const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -647,6 +716,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
mc->min_cpus = MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT + 1;
mc->default_cpus = mc->min_cpus;
mc->default_ram_id = "microchip.icicle.kit.ram";
+ mc->auto_create_sdcard = true;
/*
* Map 513 MiB high memory, the minimum required high memory size, because
@@ -656,12 +726,20 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
* See memory_tests() in mss_ddr.c in the HSS source code.
*/
mc->default_ram_size = 1537 * MiB;
+
+ object_class_property_add(oc, "clint-timebase-frequency", "uint32_t",
+ microchip_icicle_kit_get_clint_timebase_freq,
+ microchip_icicle_kit_set_clint_timebase_freq,
+ NULL, NULL);
+ object_class_property_set_description(oc, "clint-timebase-frequency",
+ "Set CLINT timebase frequency in Hz.");
}
static const TypeInfo microchip_icicle_kit_machine_typeinfo = {
.name = MACHINE_TYPE_NAME("microchip-icicle-kit"),
.parent = TYPE_MACHINE,
.class_init = microchip_icicle_kit_machine_class_init,
+ .instance_init = microchip_icicle_kit_machine_instance_init,
.instance_size = sizeof(MicrochipIcicleKitState),
};
diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
index cf686f4..7a7b012 100644
--- a/hw/riscv/numa.c
+++ b/hw/riscv/numa.c
@@ -23,7 +23,7 @@
#include "hw/boards.h"
#include "hw/qdev-properties.h"
#include "hw/riscv/numa.h"
-#include "sysemu/device_tree.h"
+#include "system/device_tree.h"
static bool numa_enabled(const MachineState *ms)
{
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index 436503f..d369a8a 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -27,7 +27,8 @@
#include "hw/misc/unimp.h"
#include "hw/riscv/boot.h"
#include "qemu/units.h"
-#include "sysemu/sysemu.h"
+#include "system/system.h"
+#include "system/address-spaces.h"
/*
* This version of the OpenTitan machine currently supports
@@ -81,6 +82,7 @@ static void opentitan_machine_init(MachineState *machine)
OpenTitanState *s = OPENTITAN_MACHINE(machine);
const MemMapEntry *memmap = ibex_memmap;
MemoryRegion *sys_mem = get_system_memory();
+ RISCVBootInfo boot_info;
if (machine->ram_size != mc->default_ram_size) {
char *sz = size_to_str(mc->default_ram_size);
@@ -98,17 +100,19 @@ static void opentitan_machine_init(MachineState *machine)
memmap[IBEX_DEV_RAM].base, machine->ram);
if (machine->firmware) {
- riscv_load_firmware(machine->firmware, memmap[IBEX_DEV_RAM].base, NULL);
+ hwaddr firmware_load_addr = memmap[IBEX_DEV_RAM].base;
+ riscv_load_firmware(machine->firmware, &firmware_load_addr, NULL);
}
+ riscv_boot_info_init(&boot_info, &s->soc.cpus);
if (machine->kernel_filename) {
- riscv_load_kernel(machine, &s->soc.cpus,
+ riscv_load_kernel(machine, &boot_info,
memmap[IBEX_DEV_RAM].base,
false, NULL);
}
}
-static void opentitan_machine_class_init(ObjectClass *oc, void *data)
+static void opentitan_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -305,12 +309,11 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
memmap[IBEX_DEV_IBEX_CFG].base, memmap[IBEX_DEV_IBEX_CFG].size);
}
-static Property lowrisc_ibex_soc_props[] = {
+static const Property lowrisc_ibex_soc_props[] = {
DEFINE_PROP_UINT32("resetvec", LowRISCIbexSoCState, resetvec, 0x20000400),
- DEFINE_PROP_END_OF_LIST()
};
-static void lowrisc_ibex_soc_class_init(ObjectClass *oc, void *data)
+static void lowrisc_ibex_soc_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
new file mode 100644
index 0000000..1017d73
--- /dev/null
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2022-2023 Rivos Inc.
+ * Copyright © 2023 FORTH-ICS/CARV
+ * Copyright © 2023 RISC-V IOMMU Task Group
+ *
+ * RISC-V IOMMU - Register Layout and Data Structures.
+ *
+ * Based on the IOMMU spec version 1.0, 3/2023
+ * https://github.com/riscv-non-isa/riscv-iommu
+ */
+
+#ifndef HW_RISCV_IOMMU_BITS_H
+#define HW_RISCV_IOMMU_BITS_H
+
+#define RISCV_IOMMU_SPEC_DOT_VER 0x010
+
+#ifndef GENMASK_ULL
+#define GENMASK_ULL(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l))
+#endif
+
+/*
+ * struct riscv_iommu_fq_record - Fault/Event Queue Record
+ * See section 3.2 for more info.
+ */
+struct riscv_iommu_fq_record {
+ uint64_t hdr;
+ uint64_t _reserved;
+ uint64_t iotval;
+ uint64_t iotval2;
+};
+/* Header fields */
+#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0)
+#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32)
+#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34)
+#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40)
+
+/*
+ * struct riscv_iommu_pq_record - PCIe Page Request record
+ * For more infos on the PCIe Page Request queue see chapter 3.3.
+ */
+struct riscv_iommu_pq_record {
+ uint64_t hdr;
+ uint64_t payload;
+};
+/* Header fields */
+#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32)
+#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33)
+#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34)
+#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40)
+
+/* Payload fields */
+#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0)
+#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1)
+#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2)
+#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0)
+#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3)
+#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12)
+
+/* Common field positions */
+#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10)
+#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0)
+#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0)
+#define RISCV_IOMMU_QUEUE_ENABLE BIT(0)
+#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1)
+#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8)
+#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9)
+#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16)
+#define RISCV_IOMMU_QUEUE_BUSY BIT(17)
+#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0)
+#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60)
+
+/* 5.3 IOMMU Capabilities (64bits) */
+#define RISCV_IOMMU_REG_CAP 0x0000
+#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0)
+#define RISCV_IOMMU_CAP_SV32 BIT_ULL(8)
+#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9)
+#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10)
+#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11)
+#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16)
+#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17)
+#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18)
+#define RISCV_IOMMU_CAP_SV57X4 BIT_ULL(19)
+#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22)
+#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23)
+#define RISCV_IOMMU_CAP_ATS BIT_ULL(25)
+#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26)
+#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28)
+#define RISCV_IOMMU_CAP_HPM BIT_ULL(30)
+#define RISCV_IOMMU_CAP_DBG BIT_ULL(31)
+#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32)
+#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38)
+#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39)
+#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40)
+
+enum riscv_iommu_igs_modes {
+ RISCV_IOMMU_CAP_IGS_MSI = 0,
+ RISCV_IOMMU_CAP_IGS_WSI,
+ RISCV_IOMMU_CAP_IGS_BOTH
+};
+
+/* 5.4 Features control register (32bits) */
+#define RISCV_IOMMU_REG_FCTL 0x0008
+#define RISCV_IOMMU_FCTL_BE BIT(0)
+#define RISCV_IOMMU_FCTL_WSI BIT(1)
+#define RISCV_IOMMU_FCTL_GXL BIT(2)
+
+/* 5.5 Device-directory-table pointer (64bits) */
+#define RISCV_IOMMU_REG_DDTP 0x0010
+#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0)
+#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4)
+#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD
+
+enum riscv_iommu_ddtp_modes {
+ RISCV_IOMMU_DDTP_MODE_OFF = 0,
+ RISCV_IOMMU_DDTP_MODE_BARE = 1,
+ RISCV_IOMMU_DDTP_MODE_1LVL = 2,
+ RISCV_IOMMU_DDTP_MODE_2LVL = 3,
+ RISCV_IOMMU_DDTP_MODE_3LVL = 4,
+ RISCV_IOMMU_DDTP_MODE_MAX = 4
+};
+
+/* 5.6 Command Queue Base (64bits) */
+#define RISCV_IOMMU_REG_CQB 0x0018
+#define RISCV_IOMMU_CQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.7 Command Queue head (32bits) */
+#define RISCV_IOMMU_REG_CQH 0x0020
+
+/* 5.8 Command Queue tail (32bits) */
+#define RISCV_IOMMU_REG_CQT 0x0024
+
+/* 5.9 Fault Queue Base (64bits) */
+#define RISCV_IOMMU_REG_FQB 0x0028
+#define RISCV_IOMMU_FQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_FQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.10 Fault Queue Head (32bits) */
+#define RISCV_IOMMU_REG_FQH 0x0030
+
+/* 5.11 Fault Queue tail (32bits) */
+#define RISCV_IOMMU_REG_FQT 0x0034
+
+/* 5.12 Page Request Queue base (64bits) */
+#define RISCV_IOMMU_REG_PQB 0x0038
+#define RISCV_IOMMU_PQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD
+#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.13 Page Request Queue head (32bits) */
+#define RISCV_IOMMU_REG_PQH 0x0040
+
+/* 5.14 Page Request Queue tail (32bits) */
+#define RISCV_IOMMU_REG_PQT 0x0044
+
+/* 5.15 Command Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_CQCSR 0x0048
+#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9)
+#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10)
+#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11)
+#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.16 Fault Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_FQCSR 0x004C
+#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW
+#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.17 Page Request Queue CSR (32bits) */
+#define RISCV_IOMMU_REG_PQCSR 0x0050
+#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE
+#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE
+#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT
+#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW
+#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE
+#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY
+
+/* 5.18 Interrupt Pending Status (32bits) */
+#define RISCV_IOMMU_REG_IPSR 0x0054
+#define RISCV_IOMMU_IPSR_CIP BIT(0)
+#define RISCV_IOMMU_IPSR_FIP BIT(1)
+#define RISCV_IOMMU_IPSR_PIP BIT(3)
+
+enum {
+ RISCV_IOMMU_INTR_CQ,
+ RISCV_IOMMU_INTR_FQ,
+ RISCV_IOMMU_INTR_PM,
+ RISCV_IOMMU_INTR_PQ,
+ RISCV_IOMMU_INTR_COUNT
+};
+
+#define RISCV_IOMMU_IOCOUNT_NUM 31
+
+/* 5.19 Performance monitoring counter overflow status (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058
+#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0)
+
+/* 5.20 Performance monitoring counter inhibits (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C
+#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0)
+
+/* 5.21 Performance monitoring cycles counter (64bits) */
+#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060
+#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0)
+#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63)
+
+/* 5.22 Performance monitoring event counters (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068
+#define RISCV_IOMMU_REG_IOHPMCTR(_n) \
+ (RISCV_IOMMU_REG_IOHPMCTR_BASE + (_n * 0x8))
+
+/* 5.23 Performance monitoring event selectors (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160
+#define RISCV_IOMMU_REG_IOHPMEVT(_n) \
+ (RISCV_IOMMU_REG_IOHPMEVT_BASE + (_n * 0x8))
+#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0)
+#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15)
+#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16)
+#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36)
+#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60)
+#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61)
+#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62)
+#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63)
+
+enum RISCV_IOMMU_HPMEVENT_id {
+ RISCV_IOMMU_HPMEVENT_INVALID = 0,
+ RISCV_IOMMU_HPMEVENT_URQ = 1,
+ RISCV_IOMMU_HPMEVENT_TRQ = 2,
+ RISCV_IOMMU_HPMEVENT_ATS_RQ = 3,
+ RISCV_IOMMU_HPMEVENT_TLB_MISS = 4,
+ RISCV_IOMMU_HPMEVENT_DD_WALK = 5,
+ RISCV_IOMMU_HPMEVENT_PD_WALK = 6,
+ RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7,
+ RISCV_IOMMU_HPMEVENT_G_WALKS = 8,
+ RISCV_IOMMU_HPMEVENT_MAX = 9
+};
+
+/* 5.24 Translation request IOVA (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258
+
+/* 5.25 Translation request control (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260
+#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0)
+#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3)
+#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40)
+
+/* 5.26 Translation request response (64bits) */
+#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268
+#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0)
+#define RISCV_IOMMU_TR_RESPONSE_S BIT_ULL(9)
+#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD
+
+/* 5.27 Interrupt cause to vector (64bits) */
+#define RISCV_IOMMU_REG_ICVEC 0x02F8
+#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0)
+#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4)
+#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8)
+#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12)
+
+/* 5.28 MSI Configuration table (32 * 64bits) */
+#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300
+
+#define RISCV_IOMMU_REG_SIZE 0x1000
+
+#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0)
+#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD
+
+/* Struct riscv_iommu_dc - Device Context - section 2.1 */
+struct riscv_iommu_dc {
+ uint64_t tc;
+ uint64_t iohgatp;
+ uint64_t ta;
+ uint64_t fsc;
+ uint64_t msiptp;
+ uint64_t msi_addr_mask;
+ uint64_t msi_addr_pattern;
+ uint64_t _reserved;
+};
+
+/* Translation control fields */
+#define RISCV_IOMMU_DC_TC_V BIT_ULL(0)
+#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1)
+#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2)
+#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3)
+#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4)
+#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5)
+#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6)
+#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7)
+#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8)
+#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9)
+#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10)
+#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11)
+
+/* Second-stage (aka G-stage) context fields */
+#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44)
+#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD
+
+enum riscv_iommu_dc_iohgatp_modes {
+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9,
+ RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10
+};
+
+/* Translation attributes fields */
+#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12)
+
+/* First-stage context fields */
+#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD
+
+/* Generic I/O MMU command structure - check section 3.1 */
+struct riscv_iommu_command {
+ uint64_t dword0;
+ uint64_t dword1;
+};
+
+#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0)
+#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7)
+
+#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1
+#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0
+#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1
+#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32)
+#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44)
+
+#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2
+#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0
+#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10)
+#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32)
+
+#define RISCV_IOMMU_CMD_IODIR_OPCODE 3
+#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0
+#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1
+#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40)
+
+/* 3.1.4 I/O MMU PCIe ATS */
+#define RISCV_IOMMU_CMD_ATS_OPCODE 4
+#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0
+#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1
+#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32)
+#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33)
+#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40)
+#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56)
+/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */
+
+/* ATS.PRGR payload */
+#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44)
+
+enum riscv_iommu_dc_fsc_atp_modes {
+ RISCV_IOMMU_DC_FSC_MODE_BARE = 0,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9,
+ RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2,
+ RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3
+};
+
+enum riscv_iommu_fq_causes {
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1,
+ RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5,
+ RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7,
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15,
+ RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20,
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21,
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23,
+ RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256,
+ RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257,
+ RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258,
+ RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259,
+ RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260,
+ RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261,
+ RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262,
+ RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263,
+ RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264,
+ RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265,
+ RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266,
+ RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267,
+ RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268,
+ RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269,
+ RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270,
+ RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271,
+ RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272,
+ RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273,
+ RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274
+};
+
+/* MSI page table pointer */
+#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD
+#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0
+#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1
+
+/* 2.2 Process Directory Table */
+#define RISCV_IOMMU_PDTE_VALID BIT_ULL(0)
+#define RISCV_IOMMU_PDTE_PPN RISCV_IOMMU_PPN_FIELD
+
+/* Translation attributes fields */
+#define RISCV_IOMMU_PC_TA_V BIT_ULL(0)
+#define RISCV_IOMMU_PC_TA_RESERVED GENMASK_ULL(63, 32)
+
+/* First stage context fields */
+#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD
+#define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44)
+
+enum riscv_iommu_fq_ttypes {
+ RISCV_IOMMU_FQ_TTYPE_NONE = 0,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2,
+ RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6,
+ RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7,
+ RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8,
+ RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9,
+};
+
+/*
+ * struct riscv_iommu_msi_pte - MSI Page Table Entry
+ */
+struct riscv_iommu_msi_pte {
+ uint64_t pte;
+ uint64_t mrif_info;
+};
+
+/* Fields on pte */
+#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0)
+#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1)
+
+#define RISCV_IOMMU_MSI_PTE_M_MRIF 1
+#define RISCV_IOMMU_MSI_PTE_M_BASIC 3
+
+/* When M == 1 (MRIF mode) */
+#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7)
+/* When M == 3 (basic mode) */
+#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD
+#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63)
+
+/* Fields on mrif_info */
+#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0)
+#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD
+#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60)
+
+#endif /* _RISCV_IOMMU_BITS_H_ */
diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c
new file mode 100644
index 0000000..c5034bf
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.c
@@ -0,0 +1,381 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "cpu_bits.h"
+#include "riscv-iommu-hpm.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+/* For now we assume IOMMU HPM frequency to be 1GHz so 1-cycle is of 1-ns. */
+static inline uint64_t get_cycles(void)
+{
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+}
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s)
+{
+ const uint64_t cycle = riscv_iommu_reg_get64(
+ s, RISCV_IOMMU_REG_IOHPMCYCLES);
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ const uint64_t ctr_prev = s->hpmcycle_prev;
+ const uint64_t ctr_val = s->hpmcycle_val;
+
+ trace_riscv_iommu_hpm_read(cycle, inhibit, ctr_prev, ctr_val);
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ /*
+ * Counter should not increment if inhibit bit is set. We can't really
+ * stop the QEMU_CLOCK_VIRTUAL, so we just return the last updated
+ * counter value to indicate that counter was not incremented.
+ */
+ return (ctr_val & RISCV_IOMMU_IOHPMCYCLES_COUNTER) |
+ (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+ }
+
+ return (ctr_val + get_cycles() - ctr_prev) |
+ (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+}
+
+static void hpm_incr_ctr(RISCVIOMMUState *s, uint32_t ctr_idx)
+{
+ const uint32_t off = ctr_idx << 3;
+ uint64_t cntr_val;
+
+ cntr_val = ldq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off]);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off], cntr_val + 1);
+
+ trace_riscv_iommu_hpm_incr_ctr(cntr_val);
+
+ /* Handle the overflow scenario. */
+ if (cntr_val == UINT64_MAX) {
+ /*
+ * Generate interrupt only if OF bit is clear. +1 to offset the cycle
+ * register OF bit.
+ */
+ const uint32_t ovf =
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+ BIT(ctr_idx + 1), 0);
+ if (!get_field(ovf, BIT(ctr_idx + 1))) {
+ riscv_iommu_reg_mod64(s,
+ RISCV_IOMMU_REG_IOHPMEVT_BASE + off,
+ RISCV_IOMMU_IOHPMEVT_OF,
+ 0);
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+ }
+ }
+}
+
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ unsigned event_id)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint32_t did_gscid;
+ uint32_t pid_pscid;
+ uint32_t ctr_idx;
+ gpointer value;
+ uint32_t ctrs;
+ uint64_t evt;
+
+ if (!(s->cap & RISCV_IOMMU_CAP_HPM)) {
+ return;
+ }
+
+ value = g_hash_table_lookup(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id));
+ if (value == NULL) {
+ return;
+ }
+
+ for (ctrs = GPOINTER_TO_UINT(value); ctrs != 0; ctrs &= ctrs - 1) {
+ ctr_idx = ctz32(ctrs);
+ if (get_field(inhibit, BIT(ctr_idx + 1))) {
+ continue;
+ }
+
+ evt = riscv_iommu_reg_get64(s,
+ RISCV_IOMMU_REG_IOHPMEVT_BASE + (ctr_idx << 3));
+
+ /*
+ * It's quite possible that event ID has been changed in counter
+ * but hashtable hasn't been updated yet. We don't want to increment
+ * counter for the old event ID.
+ */
+ if (event_id != get_field(evt, RISCV_IOMMU_IOHPMEVT_EVENT_ID)) {
+ continue;
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_IDT)) {
+ did_gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
+ pid_pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
+ } else {
+ did_gscid = ctx->devid;
+ pid_pscid = ctx->process_id;
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_PV_PSCV)) {
+ /*
+ * If the transaction does not have a valid process_id, counter
+ * increments if device_id matches DID_GSCID. If the transaction
+ * has a valid process_id, counter increments if device_id
+ * matches DID_GSCID and process_id matches PID_PSCID. See
+ * IOMMU Specification, Chapter 5.23. Performance-monitoring
+ * event selector.
+ */
+ if (ctx->process_id &&
+ get_field(evt, RISCV_IOMMU_IOHPMEVT_PID_PSCID) != pid_pscid) {
+ continue;
+ }
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DV_GSCV)) {
+ uint32_t mask = ~0;
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DMASK)) {
+ /*
+ * 1001 1011 mask = GSCID
+ * 0000 0111 mask = mask ^ (mask + 1)
+ * 1111 1000 mask = ~mask;
+ */
+ mask = get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
+ mask = mask ^ (mask + 1);
+ mask = ~mask;
+ }
+
+ if ((get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID) & mask) !=
+ (did_gscid & mask)) {
+ continue;
+ }
+ }
+
+ hpm_incr_ctr(s, ctr_idx);
+ }
+}
+
+/* Timer callback for cycle counter overflow. */
+void riscv_iommu_hpm_timer_cb(void *priv)
+{
+ RISCVIOMMUState *s = priv;
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint32_t ovf;
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ return;
+ }
+
+ if (s->irq_overflow_left > 0) {
+ uint64_t irq_trigger_at =
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->irq_overflow_left;
+ timer_mod_anticipate_ns(s->hpm_timer, irq_trigger_at);
+ s->irq_overflow_left = 0;
+ return;
+ }
+
+ ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+ if (!get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY)) {
+ /*
+ * We don't need to set hpmcycle_val to zero and update hpmcycle_prev to
+ * current clock value. The way we calculate iohpmcycs will overflow
+ * and return the correct value. This avoids the need to synchronize
+ * timer callback and write callback.
+ */
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+ RISCV_IOMMU_IOCOUNTOVF_CY, 0);
+ riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_IOHPMCYCLES,
+ RISCV_IOMMU_IOHPMCYCLES_OVF, 0);
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+ }
+}
+
+static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint64_t overflow_at, overflow_ns;
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ return;
+ }
+
+ /*
+ * We are using INT64_MAX here instead to UINT64_MAX because cycle counter
+ * has 63-bit precision and INT64_MAX is the maximum it can store.
+ */
+ if (value) {
+ overflow_ns = INT64_MAX - value + 1;
+ } else {
+ overflow_ns = INT64_MAX;
+ }
+
+ overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns;
+
+ if (overflow_at > INT64_MAX) {
+ s->irq_overflow_left = overflow_at - INT64_MAX;
+ overflow_at = INT64_MAX;
+ }
+
+ timer_mod_anticipate_ns(s->hpm_timer, overflow_at);
+}
+
+/* Updates the internal cycle counter state when iocntinh:CY is changed. */
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+
+ /* We only need to process CY bit toggle. */
+ if (!(inhibit ^ prev_cy_inh)) {
+ return;
+ }
+
+ trace_riscv_iommu_hpm_iocntinh_cy(prev_cy_inh);
+
+ if (!(inhibit & RISCV_IOMMU_IOCOUNTINH_CY)) {
+ /*
+ * Cycle counter is enabled. Just start the timer again and update
+ * the clock snapshot value to point to the current time to make
+ * sure iohpmcycles read is correct.
+ */
+ s->hpmcycle_prev = get_cycles();
+ hpm_setup_timer(s, s->hpmcycle_val);
+ } else {
+ /*
+ * Cycle counter is disabled. Stop the timer and update the cycle
+ * counter to record the current value which is last programmed
+ * value + the cycles passed so far.
+ */
+ s->hpmcycle_val = s->hpmcycle_val + (get_cycles() - s->hpmcycle_prev);
+ timer_del(s->hpm_timer);
+ }
+}
+
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s)
+{
+ const uint64_t val = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_IOHPMCYCLES);
+ const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+
+ trace_riscv_iommu_hpm_cycle_write(ovf, val);
+
+ /*
+ * Clear OF bit in IOCNTOVF if it's being cleared in IOHPMCYCLES register.
+ */
+ if (get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY) &&
+ !get_field(val, RISCV_IOMMU_IOHPMCYCLES_OVF)) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, 0,
+ RISCV_IOMMU_IOCOUNTOVF_CY);
+ }
+
+ s->hpmcycle_val = val & ~RISCV_IOMMU_IOHPMCYCLES_OVF;
+ s->hpmcycle_prev = get_cycles();
+ hpm_setup_timer(s, s->hpmcycle_val);
+}
+
+static inline bool check_valid_event_id(unsigned event_id)
+{
+ return event_id > RISCV_IOMMU_HPMEVENT_INVALID &&
+ event_id < RISCV_IOMMU_HPMEVENT_MAX;
+}
+
+static gboolean hpm_event_equal(gpointer key, gpointer value, gpointer udata)
+{
+ uint32_t *pair = udata;
+
+ if (GPOINTER_TO_UINT(value) & (1 << pair[0])) {
+ pair[1] = GPOINTER_TO_UINT(key);
+ return true;
+ }
+
+ return false;
+}
+
+/* Caller must check ctr_idx against hpm_ctrs to see if its supported or not. */
+static void update_event_map(RISCVIOMMUState *s, uint64_t value,
+ uint32_t ctr_idx)
+{
+ unsigned event_id = get_field(value, RISCV_IOMMU_IOHPMEVT_EVENT_ID);
+ uint32_t pair[2] = { ctr_idx, RISCV_IOMMU_HPMEVENT_INVALID };
+ uint32_t new_value = 1 << ctr_idx;
+ gpointer data;
+
+ /*
+ * If EventID field is RISCV_IOMMU_HPMEVENT_INVALID
+ * remove the current mapping.
+ */
+ if (event_id == RISCV_IOMMU_HPMEVENT_INVALID) {
+ data = g_hash_table_find(s->hpm_event_ctr_map, hpm_event_equal, pair);
+
+ new_value = GPOINTER_TO_UINT(data) & ~(new_value);
+ if (new_value != 0) {
+ g_hash_table_replace(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(pair[1]),
+ GUINT_TO_POINTER(new_value));
+ } else {
+ g_hash_table_remove(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(pair[1]));
+ }
+
+ return;
+ }
+
+ /* Update the counter mask if the event is already enabled. */
+ if (g_hash_table_lookup_extended(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id),
+ NULL,
+ &data)) {
+ new_value |= GPOINTER_TO_UINT(data);
+ }
+
+ g_hash_table_insert(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id),
+ GUINT_TO_POINTER(new_value));
+}
+
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg)
+{
+ const uint32_t ctr_idx = (evt_reg - RISCV_IOMMU_REG_IOHPMEVT_BASE) >> 3;
+ const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+ uint64_t val = riscv_iommu_reg_get64(s, evt_reg);
+
+ if (ctr_idx >= s->hpm_cntrs) {
+ return;
+ }
+
+ trace_riscv_iommu_hpm_evt_write(ctr_idx, ovf, val);
+
+ /* Clear OF bit in IOCNTOVF if it's being cleared in IOHPMEVT register. */
+ if (get_field(ovf, BIT(ctr_idx + 1)) &&
+ !get_field(val, RISCV_IOMMU_IOHPMEVT_OF)) {
+ /* +1 to offset CYCLE register OF bit. */
+ riscv_iommu_reg_mod32(
+ s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, BIT(ctr_idx + 1));
+ }
+
+ if (!check_valid_event_id(get_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID))) {
+ /* Reset EventID (WARL) field to invalid. */
+ val = set_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID,
+ RISCV_IOMMU_HPMEVENT_INVALID);
+ riscv_iommu_reg_set64(s, evt_reg, val);
+ }
+
+ update_event_map(s, val, ctr_idx);
+}
diff --git a/hw/riscv/riscv-iommu-hpm.h b/hw/riscv/riscv-iommu-hpm.h
new file mode 100644
index 0000000..5fc4ef2
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.h
@@ -0,0 +1,33 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_HPM_H
+#define HW_RISCV_IOMMU_HPM_H
+
+#include "qom/object.h"
+#include "hw/riscv/riscv-iommu.h"
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s);
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ unsigned event_id);
+void riscv_iommu_hpm_timer_cb(void *priv);
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh);
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s);
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg);
+
+#endif
diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c
new file mode 100644
index 0000000..cdb4a7a
--- /dev/null
+++ b/hw/riscv/riscv-iommu-pci.c
@@ -0,0 +1,217 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/target_page.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/host-utils.h"
+#include "qom/object.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+/* RISC-V IOMMU PCI Device Emulation */
+#define RISCV_PCI_CLASS_SYSTEM_IOMMU 0x0806
+
+/*
+ * 4 MSIx vectors for ICVEC, one for MRIF. The spec mentions in
+ * the "Placement and data flow" section that:
+ *
+ * "The interfaces related to recording an incoming MSI in a memory-resident
+ * interrupt file (MRIF) are implementation-specific. The partitioning of
+ * responsibility between the IOMMU and the IO bridge for recording the
+ * incoming MSI in an MRIF and generating the associated notice MSI are
+ * implementation-specific."
+ *
+ * We're making a design decision to create the MSIx for MRIF in the
+ * IOMMU MSIx emulation.
+ */
+#define RISCV_IOMMU_PCI_MSIX_VECTORS 5
+
+/*
+ * 4 vectors that can be used by civ, fiv, pmiv and piv. Number of
+ * vectors is represented by 2^N, where N = number of writable bits
+ * in each cause. For 4 vectors we'll write 0b11 (3) in each reg.
+ */
+#define RISCV_IOMMU_PCI_ICVEC_VECTORS 0x3333
+
+typedef struct RISCVIOMMUStatePci {
+ PCIDevice pci; /* Parent PCIe device state */
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint8_t revision;
+ MemoryRegion bar0; /* PCI BAR (including MSI-x config) */
+ RISCVIOMMUState iommu; /* common IOMMU state */
+} RISCVIOMMUStatePci;
+
+/* interrupt delivery callback */
+static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector)
+{
+ RISCVIOMMUStatePci *s = container_of(iommu, RISCVIOMMUStatePci, iommu);
+
+ if (msix_enabled(&(s->pci))) {
+ msix_notify(&(s->pci), vector);
+ }
+}
+
+static void riscv_iommu_pci_realize(PCIDevice *dev, Error **errp)
+{
+ RISCVIOMMUStatePci *s = DO_UPCAST(RISCVIOMMUStatePci, pci, dev);
+ RISCVIOMMUState *iommu = &s->iommu;
+ uint8_t *pci_conf = dev->config;
+ Error *err = NULL;
+
+ pci_set_word(pci_conf + PCI_VENDOR_ID, s->vendor_id);
+ pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, s->vendor_id);
+ pci_set_word(pci_conf + PCI_DEVICE_ID, s->device_id);
+ pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, s->device_id);
+ pci_set_byte(pci_conf + PCI_REVISION_ID, s->revision);
+
+ /* Set device id for trace / debug */
+ DEVICE(iommu)->id = g_strdup_printf("%02x:%02x.%01x",
+ pci_dev_bus_num(dev), PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+ qdev_realize(DEVICE(iommu), NULL, errp);
+
+ memory_region_init(&s->bar0, OBJECT(s), "riscv-iommu-bar0",
+ QEMU_ALIGN_UP(memory_region_size(&iommu->regs_mr), TARGET_PAGE_SIZE));
+ memory_region_add_subregion(&s->bar0, 0, &iommu->regs_mr);
+
+ pcie_endpoint_cap_init(dev, 0);
+
+ pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &s->bar0);
+
+ int ret = msix_init(dev, RISCV_IOMMU_PCI_MSIX_VECTORS,
+ &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG,
+ &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG + 256, 0, &err);
+
+ if (ret == -ENOTSUP) {
+ /*
+ * MSI-x is not supported by the platform.
+ * Driver should use timer/polling based notification handlers.
+ */
+ warn_report_err(err);
+ } else if (ret < 0) {
+ error_propagate(errp, err);
+ return;
+ } else {
+ /* Mark all ICVEC MSIx vectors as used */
+ for (int i = 0; i < RISCV_IOMMU_PCI_MSIX_VECTORS; i++) {
+ msix_vector_use(dev, i);
+ }
+
+ iommu->notify = riscv_iommu_pci_notify;
+ }
+
+ PCIBus *bus = pci_device_root_bus(dev);
+ if (!bus) {
+ error_setg(errp, "can't find PCIe root port for %02x:%02x.%x",
+ pci_bus_num(pci_get_bus(dev)), PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ return;
+ }
+
+ riscv_iommu_pci_setup_iommu(iommu, bus, errp);
+}
+
+static void riscv_iommu_pci_exit(PCIDevice *pci_dev)
+{
+ pci_setup_iommu(pci_device_root_bus(pci_dev), NULL, NULL);
+}
+
+static const VMStateDescription riscv_iommu_vmstate = {
+ .name = "riscv-iommu",
+ .unmigratable = 1
+};
+
+static void riscv_iommu_pci_init(Object *obj)
+{
+ RISCVIOMMUStatePci *s = RISCV_IOMMU_PCI(obj);
+ RISCVIOMMUState *iommu = &s->iommu;
+
+ object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU);
+ qdev_alias_all_properties(DEVICE(iommu), obj);
+
+ iommu->icvec_avail_vectors = RISCV_IOMMU_PCI_ICVEC_VECTORS;
+ riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_MSI);
+}
+
+static const Property riscv_iommu_pci_properties[] = {
+ DEFINE_PROP_UINT16("vendor-id", RISCVIOMMUStatePci, vendor_id,
+ PCI_VENDOR_ID_REDHAT),
+ DEFINE_PROP_UINT16("device-id", RISCVIOMMUStatePci, device_id,
+ PCI_DEVICE_ID_REDHAT_RISCV_IOMMU),
+ DEFINE_PROP_UINT8("revision", RISCVIOMMUStatePci, revision, 0x01),
+};
+
+static void riscv_iommu_pci_reset_hold(Object *obj, ResetType type)
+{
+ RISCVIOMMUStatePci *pci = RISCV_IOMMU_PCI(obj);
+ RISCVIOMMUState *iommu = &pci->iommu;
+
+ riscv_iommu_reset(iommu);
+
+ trace_riscv_iommu_pci_reset_hold(type);
+}
+
+static void riscv_iommu_pci_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ rc->phases.hold = riscv_iommu_pci_reset_hold;
+
+ k->realize = riscv_iommu_pci_realize;
+ k->exit = riscv_iommu_pci_exit;
+ k->class_id = RISCV_PCI_CLASS_SYSTEM_IOMMU;
+ dc->desc = "RISCV-IOMMU DMA Remapping device";
+ dc->vmsd = &riscv_iommu_vmstate;
+ dc->hotpluggable = false;
+ dc->user_creatable = true;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, riscv_iommu_pci_properties);
+}
+
+static const TypeInfo riscv_iommu_pci = {
+ .name = TYPE_RISCV_IOMMU_PCI,
+ .parent = TYPE_PCI_DEVICE,
+ .class_init = riscv_iommu_pci_class_init,
+ .instance_init = riscv_iommu_pci_init,
+ .instance_size = sizeof(RISCVIOMMUStatePci),
+ .interfaces = (const InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { },
+ },
+};
+
+static void riscv_iommu_register_pci_types(void)
+{
+ type_register_static(&riscv_iommu_pci);
+}
+
+type_init(riscv_iommu_register_pci_types);
diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c
new file mode 100644
index 0000000..e34d00a
--- /dev/null
+++ b/hw/riscv/riscv-iommu-sys.c
@@ -0,0 +1,248 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU Platform Device
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/host-utils.h"
+#include "qemu/module.h"
+#include "qom/object.h"
+#include "trace.h"
+
+#include "riscv-iommu.h"
+
+#define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333
+
+#define RISCV_IOMMU_PCI_MSIX_VECTORS 5
+
+/* RISC-V IOMMU System Platform Device Emulation */
+
+struct RISCVIOMMUStateSys {
+ SysBusDevice parent;
+ uint64_t addr;
+ uint32_t base_irq;
+ DeviceState *irqchip;
+ RISCVIOMMUState iommu;
+
+ /* Wired int support */
+ qemu_irq irqs[RISCV_IOMMU_INTR_COUNT];
+
+ /* Memory Regions for MSIX table and pending bit entries. */
+ MemoryRegion msix_table_mmio;
+ MemoryRegion msix_pba_mmio;
+ uint8_t *msix_table;
+ uint8_t *msix_pba;
+};
+
+static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ RISCVIOMMUStateSys *s = opaque;
+
+ g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
+ return pci_get_long(s->msix_table + addr);
+}
+
+static void msix_table_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ RISCVIOMMUStateSys *s = opaque;
+
+ g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE);
+ pci_set_long(s->msix_table + addr, val);
+}
+
+static const MemoryRegionOps msix_table_mmio_ops = {
+ .read = msix_table_mmio_read,
+ .write = msix_table_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .max_access_size = 4,
+ },
+};
+
+static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ RISCVIOMMUStateSys *s = opaque;
+
+ return pci_get_long(s->msix_pba + addr);
+}
+
+static void msix_pba_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps msix_pba_mmio_ops = {
+ .read = msix_pba_mmio_read,
+ .write = msix_pba_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .max_access_size = 4,
+ },
+};
+
+static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s,
+ uint32_t n_vectors)
+{
+ RISCVIOMMUState *iommu = &s->iommu;
+ uint32_t table_size = n_vectors * PCI_MSIX_ENTRY_SIZE;
+ uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG;
+ uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8;
+ uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256;
+
+ s->msix_table = g_malloc0(table_size);
+ s->msix_pba = g_malloc0(pba_size);
+
+ memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops,
+ s, "msix-table", table_size);
+ memory_region_add_subregion(&iommu->regs_mr, table_offset,
+ &s->msix_table_mmio);
+
+ memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s,
+ "msix-pba", pba_size);
+ memory_region_add_subregion(&iommu->regs_mr, pba_offset,
+ &s->msix_pba_mmio);
+}
+
+static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s,
+ uint32_t vector)
+{
+ uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
+ uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
+ uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
+ MemTxResult result;
+
+ address_space_stl_le(&address_space_memory, msi_addr,
+ msi_data, MEMTXATTRS_UNSPECIFIED, &result);
+ trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result);
+}
+
+static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu,
+ unsigned vector)
+{
+ RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu);
+ uint32_t fctl = riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL);
+
+ if (fctl & RISCV_IOMMU_FCTL_WSI) {
+ qemu_irq_pulse(s->irqs[vector]);
+ trace_riscv_iommu_sys_irq_sent(vector);
+ return;
+ }
+
+ riscv_iommu_sysdev_send_MSI(s, vector);
+}
+
+static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp)
+{
+ RISCVIOMMUStateSys *s = RISCV_IOMMU_SYS(dev);
+ SysBusDevice *sysdev = SYS_BUS_DEVICE(s);
+ PCIBus *pci_bus;
+ qemu_irq irq;
+
+ qdev_realize(DEVICE(&s->iommu), NULL, errp);
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iommu.regs_mr);
+ if (s->addr) {
+ sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, s->addr);
+ }
+
+ pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
+ if (pci_bus) {
+ riscv_iommu_pci_setup_iommu(&s->iommu, pci_bus, errp);
+ }
+
+ s->iommu.notify = riscv_iommu_sysdev_notify;
+
+ /* 4 IRQs are defined starting from s->base_irq */
+ for (int i = 0; i < RISCV_IOMMU_INTR_COUNT; i++) {
+ sysbus_init_irq(sysdev, &s->irqs[i]);
+ irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i);
+ sysbus_connect_irq(sysdev, i, irq);
+ }
+
+ riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS);
+}
+
+static void riscv_iommu_sys_init(Object *obj)
+{
+ RISCVIOMMUStateSys *s = RISCV_IOMMU_SYS(obj);
+ RISCVIOMMUState *iommu = &s->iommu;
+
+ object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU);
+ qdev_alias_all_properties(DEVICE(iommu), obj);
+
+ iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS;
+ riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH);
+}
+
+static const Property riscv_iommu_sys_properties[] = {
+ DEFINE_PROP_UINT64("addr", RISCVIOMMUStateSys, addr, 0),
+ DEFINE_PROP_UINT32("base-irq", RISCVIOMMUStateSys, base_irq, 0),
+ DEFINE_PROP_LINK("irqchip", RISCVIOMMUStateSys, irqchip,
+ TYPE_DEVICE, DeviceState *),
+};
+
+static void riscv_iommu_sys_reset_hold(Object *obj, ResetType type)
+{
+ RISCVIOMMUStateSys *sys = RISCV_IOMMU_SYS(obj);
+ RISCVIOMMUState *iommu = &sys->iommu;
+
+ riscv_iommu_reset(iommu);
+
+ trace_riscv_iommu_sys_reset_hold(type);
+}
+
+static void riscv_iommu_sys_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ rc->phases.hold = riscv_iommu_sys_reset_hold;
+
+ dc->realize = riscv_iommu_sys_realize;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, riscv_iommu_sys_properties);
+}
+
+static const TypeInfo riscv_iommu_sys = {
+ .name = TYPE_RISCV_IOMMU_SYS,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .class_init = riscv_iommu_sys_class_init,
+ .instance_init = riscv_iommu_sys_init,
+ .instance_size = sizeof(RISCVIOMMUStateSys),
+};
+
+static void riscv_iommu_register_sys(void)
+{
+ type_register_static(&riscv_iommu_sys);
+}
+
+type_init(riscv_iommu_register_sys)
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
new file mode 100644
index 0000000..a877e5d
--- /dev/null
+++ b/hw/riscv/riscv-iommu.c
@@ -0,0 +1,2679 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2021-2023, Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "exec/target_page.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "riscv-iommu-hpm.h"
+#include "trace.h"
+
+#define LIMIT_CACHE_CTX (1U << 7)
+#define LIMIT_CACHE_IOT (1U << 20)
+
+/* Physical page number coversions */
+#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
+#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
+
+typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
+
+/* Device assigned I/O address space */
+struct RISCVIOMMUSpace {
+ IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */
+ AddressSpace iova_as; /* IOVA address space for attached device */
+ RISCVIOMMUState *iommu; /* Managing IOMMU device state */
+ uint32_t devid; /* Requester identifier, AKA device_id */
+ bool notifier; /* IOMMU unmap notifier enabled */
+ QLIST_ENTRY(RISCVIOMMUSpace) list;
+};
+
+typedef enum RISCVIOMMUTransTag {
+ RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */
+ RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */
+ RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */
+ RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */
+} RISCVIOMMUTransTag;
+
+/* Address translation cache entry */
+struct RISCVIOMMUEntry {
+ RISCVIOMMUTransTag tag; /* Translation Tag */
+ uint64_t iova:44; /* IOVA Page Number */
+ uint64_t pscid:20; /* Process Soft-Context identifier */
+ uint64_t phys:44; /* Physical Page Number */
+ uint64_t gscid:16; /* Guest Soft-Context identifier */
+ uint64_t perm:2; /* IOMMU_RW flags */
+};
+
+/* IOMMU index for transactions without process_id specified. */
+#define RISCV_IOMMU_NOPROCID 0
+
+static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
+{
+ switch (vec_type) {
+ case RISCV_IOMMU_INTR_CQ:
+ return icvec & RISCV_IOMMU_ICVEC_CIV;
+ case RISCV_IOMMU_INTR_FQ:
+ return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
+ case RISCV_IOMMU_INTR_PM:
+ return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
+ case RISCV_IOMMU_INTR_PQ:
+ return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
+{
+ uint32_t ipsr, icvec, vector;
+
+ if (!s->notify) {
+ return;
+ }
+
+ icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
+ ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
+
+ if (!(ipsr & (1 << vec_type))) {
+ vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
+ s->notify(s, vector);
+ trace_riscv_iommu_notify_int_vector(vec_type, vector);
+ }
+}
+
+static void riscv_iommu_fault(RISCVIOMMUState *s,
+ struct riscv_iommu_fq_record *ev)
+{
+ uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+ uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
+ uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
+ uint32_t next = (tail + 1) & s->fq_mask;
+ uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
+
+ trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), ev->hdr, ev->iotval);
+
+ if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
+ !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
+ return;
+ }
+
+ if (head == next) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
+ RISCV_IOMMU_FQCSR_FQOF, 0);
+ } else {
+ dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
+ if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
+ RISCV_IOMMU_FQCSR_FQMF, 0);
+ } else {
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
+ }
+ }
+
+ if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
+ }
+}
+
+static void riscv_iommu_pri(RISCVIOMMUState *s,
+ struct riscv_iommu_pq_record *pr)
+{
+ uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+ uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
+ uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
+ uint32_t next = (tail + 1) & s->pq_mask;
+ uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
+
+ trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid), pr->payload);
+
+ if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
+ !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
+ return;
+ }
+
+ if (head == next) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
+ RISCV_IOMMU_PQCSR_PQOF, 0);
+ } else {
+ dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
+ if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
+ RISCV_IOMMU_PQCSR_PQMF, 0);
+ } else {
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
+ }
+ }
+
+ if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
+ }
+}
+
+/*
+ * Discards all bits from 'val' whose matching bits in the same
+ * positions in the mask 'ext' are zeros, and packs the remaining
+ * bits from 'val' contiguously at the least-significant end of the
+ * result, keeping the same bit order as 'val' and filling any
+ * other bits at the most-significant end of the result with zeros.
+ *
+ * For example, for the following 'val' and 'ext', the return 'ret'
+ * will be:
+ *
+ * val = a b c d e f g h
+ * ext = 1 0 1 0 0 1 1 0
+ * ret = 0 0 0 0 a c f g
+ *
+ * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
+ * "Process to translate addresses of MSIs", is similar to bit manip
+ * function PEXT (Parallel bits extract) from x86.
+ */
+static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
+{
+ uint64_t ret = 0;
+ uint64_t rot = 1;
+
+ while (ext) {
+ if (ext & 1) {
+ if (val & 1) {
+ ret |= rot;
+ }
+ rot <<= 1;
+ }
+ val >>= 1;
+ ext >>= 1;
+ }
+
+ return ret;
+}
+
+/* Check if GPA matches MSI/MRIF pattern. */
+static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ dma_addr_t gpa)
+{
+ if (!s->enable_msi) {
+ return false;
+ }
+
+ if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
+ RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
+ return false; /* Invalid MSI/MRIF mode */
+ }
+
+ if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
+ return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
+ }
+
+ return true;
+}
+
+/*
+ * RISCV IOMMU Address Translation Lookup - Page Table Walk
+ *
+ * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
+ * Both implementation can be merged into single helper function in future.
+ * Keeping them separate for now, as error reporting and flow specifics are
+ * sufficiently different for separate implementation.
+ *
+ * @s : IOMMU Device State
+ * @ctx : Translation context for device id and process address space id.
+ * @iotlb : translation data: physical address and access mode.
+ * @return : success or fault cause code.
+ */
+static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ IOMMUTLBEntry *iotlb)
+{
+ dma_addr_t addr, base;
+ uint64_t satp, gatp, pte;
+ bool en_s, en_g;
+ struct {
+ unsigned char step;
+ unsigned char levels;
+ unsigned char ptidxbits;
+ unsigned char ptesize;
+ } sc[2];
+ /* Translation stage phase */
+ enum {
+ S_STAGE = 0,
+ G_STAGE = 1,
+ } pass;
+ MemTxResult ret;
+
+ satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
+ gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+
+ en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
+ en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
+
+ /*
+ * Early check for MSI address match when IOVA == GPA.
+ * Note that the (!en_s) condition means that the MSI
+ * page table may only be used when guest pages are
+ * mapped using the g-stage page table, whether single-
+ * or two-stage paging is enabled. It's unavoidable though,
+ * because the spec mandates that we do a first-stage
+ * translation before we check the MSI page table, which
+ * means we can't do an early MSI check unless we have
+ * strictly !en_s.
+ */
+ if (!en_s && (iotlb->perm & IOMMU_WO) &&
+ riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
+ iotlb->target_as = &s->trap_as;
+ iotlb->translated_addr = iotlb->iova;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ return 0;
+ }
+
+ /* Exit early for pass-through mode. */
+ if (!(en_s || en_g)) {
+ iotlb->translated_addr = iotlb->iova;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ /* Allow R/W in pass-through mode */
+ iotlb->perm = IOMMU_RW;
+ return 0;
+ }
+
+ /* S/G translation parameters. */
+ for (pass = 0; pass < 2; pass++) {
+ uint32_t sv_mode;
+
+ sc[pass].step = 0;
+ if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
+ (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
+ /* 32bit mode for GXL/SXL == 1 */
+ switch (pass ? gatp : satp) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
+ sc[pass].levels = 0;
+ sc[pass].ptidxbits = 0;
+ sc[pass].ptesize = 0;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 2;
+ sc[pass].ptidxbits = 10;
+ sc[pass].ptesize = 4;
+ break;
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ } else {
+ /* 64bit mode for GXL/SXL == 0 */
+ switch (pass ? gatp : satp) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
+ sc[pass].levels = 0;
+ sc[pass].ptidxbits = 0;
+ sc[pass].ptesize = 0;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 3;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 4;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
+ sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
+ if (!(s->cap & sv_mode)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ sc[pass].levels = 5;
+ sc[pass].ptidxbits = 9;
+ sc[pass].ptesize = 8;
+ break;
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ }
+ };
+
+ /* S/G stages translation tables root pointers */
+ gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
+ satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
+ addr = (en_s && en_g) ? satp : iotlb->iova;
+ base = en_g ? gatp : satp;
+ pass = en_g ? G_STAGE : S_STAGE;
+
+ do {
+ const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
+ const unsigned va_bits = widened + sc[pass].ptidxbits;
+ const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
+ (sc[pass].levels - 1 - sc[pass].step);
+ const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
+ const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
+ const bool ade =
+ ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
+
+ /* Address range check before first level lookup */
+ if (!sc[pass].step) {
+ const uint64_t va_len = va_skip + va_bits;
+ const uint64_t va_mask = (1ULL << va_len) - 1;
+
+ if (pass == S_STAGE && va_len > 32) {
+ target_ulong mask, masked_msbs;
+
+ mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
+ masked_msbs = (addr >> (va_len - 1)) & mask;
+
+ if (masked_msbs != 0 && masked_msbs != mask) {
+ return (iotlb->perm & IOMMU_WO) ?
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
+ }
+ } else {
+ if ((addr & va_mask) != addr) {
+ return (iotlb->perm & IOMMU_WO) ?
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
+ }
+ }
+ }
+
+
+ if (pass == S_STAGE) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
+ } else {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
+ }
+
+ /* Read page table entry */
+ if (sc[pass].ptesize == 4) {
+ uint32_t pte32 = 0;
+ ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
+ MEMTXATTRS_UNSPECIFIED);
+ pte = pte32;
+ } else {
+ ret = ldq_le_dma(s->target_as, pte_addr, &pte,
+ MEMTXATTRS_UNSPECIFIED);
+ }
+ if (ret != MEMTX_OK) {
+ return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
+ : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
+ }
+
+ sc[pass].step++;
+ hwaddr ppn = pte >> PTE_PPN_SHIFT;
+
+ if (!(pte & PTE_V)) {
+ break; /* Invalid PTE */
+ } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
+ base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
+ break; /* Reserved leaf PTE flags: PTE_W */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
+ break; /* Reserved leaf PTE flags: PTE_W + PTE_X */
+ } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
+ break; /* Misaligned PPN */
+ } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
+ break; /* Read access check failed */
+ } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
+ break; /* Write access check failed */
+ } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
+ break; /* Access bit not set */
+ } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
+ break; /* Dirty bit not set */
+ } else {
+ /* Leaf PTE, translation completed. */
+ sc[pass].step = sc[pass].levels;
+ base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
+ /* Update address mask based on smallest translation granularity */
+ iotlb->addr_mask &= (1ULL << va_skip) - 1;
+ /* Continue with S-Stage translation? */
+ if (pass && sc[0].step != sc[0].levels) {
+ pass = S_STAGE;
+ addr = iotlb->iova;
+ continue;
+ }
+ /* Translation phase completed (GPA or SPA) */
+ iotlb->translated_addr = base;
+ iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
+ : IOMMU_RO;
+
+ /* Check MSI GPA address match */
+ if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
+ riscv_iommu_msi_check(s, ctx, base)) {
+ /* Trap MSI writes and return GPA address. */
+ iotlb->target_as = &s->trap_as;
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ return 0;
+ }
+
+ /* Continue with G-Stage translation? */
+ if (!pass && en_g) {
+ pass = G_STAGE;
+ addr = base;
+ base = gatp;
+ sc[pass].step = 0;
+ continue;
+ }
+
+ return 0;
+ }
+
+ if (sc[pass].step == sc[pass].levels) {
+ break; /* Can't find leaf PTE */
+ }
+
+ /* Continue with G-Stage translation? */
+ if (!pass && en_g) {
+ pass = G_STAGE;
+ addr = base;
+ base = gatp;
+ sc[pass].step = 0;
+ }
+ } while (1);
+
+ return (iotlb->perm & IOMMU_WO) ?
+ (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
+ RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
+ (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
+ RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
+}
+
+static void riscv_iommu_report_fault(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx,
+ uint32_t fault_type, uint32_t cause,
+ bool pv,
+ uint64_t iotval, uint64_t iotval2)
+{
+ struct riscv_iommu_fq_record ev = { 0 };
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
+ switch (cause) {
+ case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
+ case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
+ case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
+ case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
+ break;
+ default:
+ /* DTF prevents reporting a fault for this given cause */
+ return;
+ }
+ }
+
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
+
+ if (pv) {
+ ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
+ }
+
+ ev.iotval = iotval;
+ ev.iotval2 = iotval2;
+
+ riscv_iommu_fault(s, &ev);
+}
+
+/* Redirect MSI write for given GPA. */
+static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
+ unsigned size, MemTxAttrs attrs)
+{
+ MemTxResult res;
+ dma_addr_t addr;
+ uint64_t intn;
+ uint32_t n190;
+ uint64_t pte[2];
+ int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
+ int cause;
+
+ /* Interrupt File Number */
+ intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
+ if (intn >= 256) {
+ /* Interrupt file number out of range */
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ /* fetch MSI PTE */
+ addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
+ addr = addr | (intn * sizeof(pte));
+ res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
+ MEMTXATTRS_UNSPECIFIED);
+ if (res != MEMTX_OK) {
+ if (res == MEMTX_DECODE_ERROR) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
+ } else {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ }
+ goto err;
+ }
+
+ le64_to_cpus(&pte[0]);
+ le64_to_cpus(&pte[1]);
+
+ if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
+ /*
+ * The spec mentions that: "If msipte.C == 1, then further
+ * processing to interpret the PTE is implementation
+ * defined.". We'll abort with cause = 262 for this
+ * case too.
+ */
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
+ goto err;
+ }
+
+ switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
+ case RISCV_IOMMU_MSI_PTE_M_BASIC:
+ /* MSI Pass-through mode */
+ addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
+
+ trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
+ PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
+ gpa, addr);
+
+ res = dma_memory_write(s->target_as, addr, &data, size, attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ return MEMTX_OK;
+ case RISCV_IOMMU_MSI_PTE_M_MRIF:
+ /* MRIF mode, continue. */
+ break;
+ default:
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
+ goto err;
+ }
+
+ /*
+ * Report an error for interrupt identities exceeding the maximum allowed
+ * for an IMSIC interrupt file (2047) or destination address is not 32-bit
+ * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
+ */
+ if ((data > 2047) || (gpa & 3)) {
+ res = MEMTX_ACCESS_ERROR;
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
+ goto err;
+ }
+
+ /* MSI MRIF mode, non atomic pending bit update */
+
+ /* MRIF pending bit address */
+ addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
+ addr = addr | ((data & 0x7c0) >> 3);
+
+ trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
+ PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
+ gpa, addr);
+
+ /* MRIF pending bit mask */
+ data = 1ULL << (data & 0x03f);
+ res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ intn = intn | data;
+ res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ /* Get MRIF enable bits */
+ addr = addr + sizeof(intn);
+ res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
+ goto err;
+ }
+
+ if (!(intn & data)) {
+ /* notification disabled, MRIF update completed. */
+ return MEMTX_OK;
+ }
+
+ /* Send notification message */
+ addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
+ n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
+ (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
+
+ res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
+ if (res != MEMTX_OK) {
+ cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
+ goto err;
+ }
+
+ trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
+
+ return MEMTX_OK;
+
+err:
+ riscv_iommu_report_fault(s, ctx, fault_type, cause,
+ !!ctx->process_id, 0, 0);
+ return res;
+}
+
+/*
+ * Check device context configuration as described by the
+ * riscv-iommu spec section "Device-context configuration
+ * checks".
+ */
+static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx)
+{
+ uint32_t fsc_mode, msi_mode;
+ uint64_t gatp;
+
+ if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
+ (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
+ ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
+ ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
+ return false;
+ }
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
+ (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
+ ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
+ return false;
+ }
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
+ ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
+ return false;
+ }
+
+ if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
+ ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
+ return false;
+ }
+
+ if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
+ msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
+
+ if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
+ msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
+ return false;
+ }
+ }
+
+ gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+ if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
+ gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
+ return false;
+ }
+
+ fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
+ switch (fsc_mode) {
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
+ if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
+ return false;
+ }
+ break;
+ }
+ } else {
+ /* DC.tc.PDTV is 0 */
+ if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
+ return false;
+ }
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
+ if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
+ !(s->cap & RISCV_IOMMU_CAP_SV32)) {
+ return false;
+ }
+ } else {
+ switch (fsc_mode) {
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+
+ /*
+ * CAP_END is always zero (only one endianess). FCTL_BE is
+ * always zero (little-endian accesses). Thus TC_SBE must
+ * always be LE, i.e. zero.
+ */
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Validate process context (PC) according to section
+ * "Process-context configuration checks".
+ */
+static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx)
+{
+ uint32_t mode;
+
+ if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
+ return false;
+ }
+
+ if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
+ return false;
+ }
+
+ mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+ switch (mode) {
+ case RISCV_IOMMU_DC_FSC_MODE_BARE:
+ /* sv39 and sv32 modes have the same value (8) */
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ break;
+ default:
+ return false;
+ }
+
+ if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
+ if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
+ !(s->cap & RISCV_IOMMU_CAP_SV32)) {
+ return false;
+ }
+ } else {
+ switch (mode) {
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
+ return false;
+ }
+ break;
+ case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
+ return false;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
+ *
+ * @s : IOMMU Device State
+ * @ctx : Device Translation Context with devid and process_id set.
+ * @return : success or fault code.
+ */
+static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
+{
+ const uint64_t ddtp = s->ddtp;
+ unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
+ dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
+ struct riscv_iommu_dc dc;
+ /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
+ const int dc_fmt = !s->enable_msi;
+ const size_t dc_len = sizeof(dc) >> dc_fmt;
+ int depth;
+ uint64_t de;
+
+ switch (mode) {
+ case RISCV_IOMMU_DDTP_MODE_OFF:
+ return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
+
+ case RISCV_IOMMU_DDTP_MODE_BARE:
+ /* mock up pass-through translation context */
+ ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
+ RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
+ ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
+ RISCV_IOMMU_DC_FSC_MODE_BARE);
+
+ ctx->tc = RISCV_IOMMU_DC_TC_V;
+ if (s->enable_ats) {
+ ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
+ }
+
+ ctx->ta = 0;
+ ctx->msiptp = 0;
+ return 0;
+
+ case RISCV_IOMMU_DDTP_MODE_1LVL:
+ depth = 0;
+ break;
+
+ case RISCV_IOMMU_DDTP_MODE_2LVL:
+ depth = 1;
+ break;
+
+ case RISCV_IOMMU_DDTP_MODE_3LVL:
+ depth = 2;
+ break;
+
+ default:
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+
+ /*
+ * Check supported device id width (in bits).
+ * See IOMMU Specification, Chapter 6. Software guidelines.
+ * - if extended device-context format is used:
+ * 1LVL: 6, 2LVL: 15, 3LVL: 24
+ * - if base device-context format is used:
+ * 1LVL: 7, 2LVL: 16, 3LVL: 24
+ */
+ if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
+ return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ }
+
+ /* Device directory tree walk */
+ for (; depth-- > 0; ) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
+ /*
+ * Select device id index bits based on device directory tree level
+ * and device context format.
+ * See IOMMU Specification, Chapter 2. Data Structures.
+ * - if extended device-context format is used:
+ * device index: [23:15][14:6][5:0]
+ * - if base device-context format is used:
+ * device index: [23:16][15:7][6:0]
+ */
+ const int split = depth * 9 + 6 + dc_fmt;
+ addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+ }
+ le64_to_cpus(&de);
+ if (!(de & RISCV_IOMMU_DDTE_VALID)) {
+ /* invalid directory entry */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+ if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
+ /* reserved bits set */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+ addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
+ }
+
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
+
+ /* index into device context entry page */
+ addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
+
+ memset(&dc, 0, sizeof(dc));
+ if (dma_memory_read(s->target_as, addr, &dc, dc_len,
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
+ }
+
+ /* Set translation context. */
+ ctx->tc = le64_to_cpu(dc.tc);
+ ctx->gatp = le64_to_cpu(dc.iohgatp);
+ ctx->satp = le64_to_cpu(dc.fsc);
+ ctx->ta = le64_to_cpu(dc.ta);
+ ctx->msiptp = le64_to_cpu(dc.msiptp);
+ ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
+ ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+
+ if (!riscv_iommu_validate_device_ctx(s, ctx)) {
+ return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
+ }
+
+ /* FSC field checks */
+ mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
+ addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
+
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
+ if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
+ /* PID is disabled */
+ return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ }
+ if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
+ /* Invalid translation mode */
+ return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
+ }
+ return 0;
+ }
+
+ if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
+ /* No default process_id enabled, set BARE mode */
+ ctx->satp = 0ULL;
+ return 0;
+ } else {
+ /* Use default process_id #0 */
+ ctx->process_id = 0;
+ }
+ }
+
+ if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
+ /* No S-Stage translation, done. */
+ return 0;
+ }
+
+ /* FSC.TC.PDTV enabled */
+ if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
+ /* Invalid PDTP.MODE */
+ return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
+ }
+
+ for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
+ /*
+ * Select process id index bits based on process directory tree
+ * level. See IOMMU Specification, 2.2. Process-Directory-Table.
+ */
+ const int split = depth * 9 + 8;
+ addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
+ }
+ le64_to_cpus(&de);
+ if (!(de & RISCV_IOMMU_PDTE_VALID)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
+ }
+ addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN));
+ }
+
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
+ /* Leaf entry in PDT */
+ addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
+ if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
+ MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
+ }
+
+ /* Use FSC and TA from process directory entry. */
+ ctx->ta = le64_to_cpu(dc.ta);
+ ctx->satp = le64_to_cpu(dc.fsc);
+
+ if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
+ }
+
+ if (!riscv_iommu_validate_process_ctx(s, ctx)) {
+ return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
+ }
+
+ return 0;
+}
+
+/* Translation Context cache support */
+static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
+{
+ RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
+ RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
+ return c1->devid == c2->devid &&
+ c1->process_id == c2->process_id;
+}
+
+static guint riscv_iommu_ctx_hash(gconstpointer v)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
+ /*
+ * Generate simple hash of (process_id, devid)
+ * assuming 24-bit wide devid.
+ */
+ return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
+}
+
+static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
+ ctx->devid == arg->devid &&
+ ctx->process_id == arg->process_id) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
+ ctx->devid == arg->devid) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
+ if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
+ ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
+ }
+}
+
+static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
+ uint32_t devid, uint32_t process_id)
+{
+ GHashTable *ctx_cache;
+ RISCVIOMMUContext key = {
+ .devid = devid,
+ .process_id = process_id,
+ };
+ ctx_cache = g_hash_table_ref(s->ctx_cache);
+ g_hash_table_foreach(ctx_cache, func, &key);
+ g_hash_table_unref(ctx_cache);
+}
+
+/* Find or allocate translation context for a given {device_id, process_id} */
+static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
+ unsigned devid, unsigned process_id,
+ void **ref)
+{
+ GHashTable *ctx_cache;
+ RISCVIOMMUContext *ctx;
+ RISCVIOMMUContext key = {
+ .devid = devid,
+ .process_id = process_id,
+ };
+
+ ctx_cache = g_hash_table_ref(s->ctx_cache);
+ ctx = g_hash_table_lookup(ctx_cache, &key);
+
+ if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
+ *ref = ctx_cache;
+ return ctx;
+ }
+
+ ctx = g_new0(RISCVIOMMUContext, 1);
+ ctx->devid = devid;
+ ctx->process_id = process_id;
+
+ int fault = riscv_iommu_ctx_fetch(s, ctx);
+ if (!fault) {
+ if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
+ g_hash_table_unref(ctx_cache);
+ ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
+ riscv_iommu_ctx_equal,
+ g_free, NULL);
+ g_hash_table_ref(ctx_cache);
+ g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
+ }
+ g_hash_table_add(ctx_cache, ctx);
+ *ref = ctx_cache;
+ return ctx;
+ }
+
+ g_hash_table_unref(ctx_cache);
+ *ref = NULL;
+
+ riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
+ fault, !!process_id, 0, 0);
+
+ g_free(ctx);
+ return NULL;
+}
+
+static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
+{
+ if (ref) {
+ g_hash_table_unref((GHashTable *)ref);
+ }
+}
+
+/* Find or allocate address space for a given device */
+static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
+{
+ RISCVIOMMUSpace *as;
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ QLIST_FOREACH(as, &s->spaces, list) {
+ if (as->devid == devid) {
+ break;
+ }
+ }
+
+ if (as == NULL) {
+ char name[64];
+ as = g_new0(RISCVIOMMUSpace, 1);
+
+ as->iommu = s;
+ as->devid = devid;
+
+ snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
+ PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+
+ /* IOVA address space, untranslated addresses */
+ memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
+ TYPE_RISCV_IOMMU_MEMORY_REGION,
+ OBJECT(as), "riscv_iommu", UINT64_MAX);
+ address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
+
+ QLIST_INSERT_HEAD(&s->spaces, as, list);
+
+ trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid));
+ }
+ return &as->iova_as;
+}
+
+/* Translation Object cache support */
+static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
+{
+ RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
+ RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
+ return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
+ t1->iova == t2->iova && t1->tag == t2->tag;
+}
+
+static guint riscv_iommu_iot_hash(gconstpointer v)
+{
+ RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
+ return (guint)t->iova;
+}
+
+/* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */
+/* GV: 0 AV: 0 GVMA: 1 */
+static
+void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */
+static
+void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->pscid == arg->pscid) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */
+static
+void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->iova == arg->iova) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */
+static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->pscid == arg->pscid &&
+ iot->iova == arg->iova) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */
+/* GV: 1 AV: 0 GVMA: 1 */
+static
+void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->gscid == arg->gscid) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */
+static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->gscid == arg->gscid &&
+ iot->pscid == arg->pscid) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */
+/* GV: 1 AV: 1 GVMA: 1 */
+static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->gscid == arg->gscid &&
+ iot->iova == arg->iova) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */
+static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value,
+ gpointer data)
+{
+ RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
+ RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
+ if (iot->tag == arg->tag &&
+ iot->gscid == arg->gscid &&
+ iot->pscid == arg->pscid &&
+ iot->iova == arg->iova) {
+ iot->perm = IOMMU_NONE;
+ }
+}
+
+/* caller should keep ref-count for iot_cache object */
+static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
+ GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag)
+{
+ RISCVIOMMUEntry key = {
+ .tag = transtag,
+ .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
+ .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
+ .iova = PPN_DOWN(iova),
+ };
+ return g_hash_table_lookup(iot_cache, &key);
+}
+
+/* caller should keep ref-count for iot_cache object */
+static void riscv_iommu_iot_update(RISCVIOMMUState *s,
+ GHashTable *iot_cache, RISCVIOMMUEntry *iot)
+{
+ if (!s->iot_limit) {
+ return;
+ }
+
+ if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
+ iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
+ riscv_iommu_iot_equal,
+ g_free, NULL);
+ g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
+ }
+ g_hash_table_add(iot_cache, iot);
+}
+
+static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
+ uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag)
+{
+ GHashTable *iot_cache;
+ RISCVIOMMUEntry key = {
+ .tag = transtag,
+ .gscid = gscid,
+ .pscid = pscid,
+ .iova = PPN_DOWN(iova),
+ };
+
+ iot_cache = g_hash_table_ref(s->iot_cache);
+ g_hash_table_foreach(iot_cache, func, &key);
+ g_hash_table_unref(iot_cache);
+}
+
+static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx)
+{
+ uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
+ uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+
+ if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) {
+ return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
+ RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG;
+ } else {
+ return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
+ RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN;
+ }
+}
+
+static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ IOMMUTLBEntry *iotlb, bool enable_cache)
+{
+ RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx);
+ RISCVIOMMUEntry *iot;
+ IOMMUAccessFlags perm;
+ bool enable_pid;
+ bool enable_pri;
+ GHashTable *iot_cache;
+ int fault;
+
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
+
+ iot_cache = g_hash_table_ref(s->iot_cache);
+ /*
+ * TC[32] is reserved for custom extensions, used here to temporarily
+ * enable automatic page-request generation for ATS queries.
+ */
+ enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
+ enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
+
+ /* Check for ATS request. */
+ if (iotlb->perm == IOMMU_NONE) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
+ /* Check if ATS is disabled. */
+ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
+ enable_pri = false;
+ fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
+ goto done;
+ }
+ }
+
+ iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag);
+ perm = iot ? iot->perm : IOMMU_NONE;
+ if (perm != IOMMU_NONE) {
+ iotlb->translated_addr = PPN_PHYS(iot->phys);
+ iotlb->addr_mask = ~TARGET_PAGE_MASK;
+ iotlb->perm = perm;
+ fault = 0;
+ goto done;
+ }
+
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
+
+ /* Translate using device directory / page table information. */
+ fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
+
+ if (!fault && iotlb->target_as == &s->trap_as) {
+ /* Do not cache trapped MSI translations */
+ goto done;
+ }
+
+ /*
+ * We made an implementation choice to not cache identity-mapped
+ * translations, as allowed by the specification, to avoid
+ * translation cache evictions for other devices sharing the
+ * IOMMU hardware model.
+ */
+ if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
+ iot = g_new0(RISCVIOMMUEntry, 1);
+ iot->iova = PPN_DOWN(iotlb->iova);
+ iot->phys = PPN_DOWN(iotlb->translated_addr);
+ iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
+ iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
+ iot->perm = iotlb->perm;
+ iot->tag = transtag;
+ riscv_iommu_iot_update(s, iot_cache, iot);
+ }
+
+done:
+ g_hash_table_unref(iot_cache);
+
+ if (enable_pri && fault) {
+ struct riscv_iommu_pq_record pr = {0};
+ if (enable_pid) {
+ pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
+ RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
+ }
+ pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
+ pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
+ RISCV_IOMMU_PREQ_PAYLOAD_M;
+ riscv_iommu_pri(s, &pr);
+ return fault;
+ }
+
+ if (fault) {
+ unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
+
+ if (iotlb->perm & IOMMU_RW) {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
+ } else if (iotlb->perm & IOMMU_RO) {
+ ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
+ }
+
+ riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
+ iotlb->iova, iotlb->translated_addr);
+ return fault;
+ }
+
+ return 0;
+}
+
+/* IOMMU Command Interface */
+static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
+ uint64_t addr, uint32_t data)
+{
+ /*
+ * ATS processing in this implementation of the IOMMU is synchronous,
+ * no need to wait for completions here.
+ */
+ if (!notify) {
+ return MEMTX_OK;
+ }
+
+ return dma_memory_write(s->target_as, addr, &data, sizeof(data),
+ MEMTXATTRS_UNSPECIFIED);
+}
+
+static void riscv_iommu_ats(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
+ IOMMUAccessFlags perm,
+ void (*trace_fn)(const char *id))
+{
+ RISCVIOMMUSpace *as = NULL;
+ IOMMUNotifier *n;
+ IOMMUTLBEvent event;
+ uint32_t pid;
+ uint32_t devid;
+ const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
+
+ if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
+ /* Use device segment and requester id */
+ devid = get_field(cmd->dword0,
+ RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
+ } else {
+ devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
+ }
+
+ pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
+
+ QLIST_FOREACH(as, &s->spaces, list) {
+ if (as->devid == devid) {
+ break;
+ }
+ }
+
+ if (!as || !as->notifier) {
+ return;
+ }
+
+ event.type = flag;
+ event.entry.perm = perm;
+ event.entry.target_as = s->target_as;
+
+ IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
+ if (!pv || n->iommu_idx == pid) {
+ event.entry.iova = n->start;
+ event.entry.addr_mask = n->end - n->start;
+ trace_fn(as->iova_mr.parent_obj.name);
+ memory_region_notify_iommu_one(n, &event);
+ }
+ }
+}
+
+static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd)
+{
+ return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
+ trace_riscv_iommu_ats_inval);
+}
+
+static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
+ struct riscv_iommu_command *cmd)
+{
+ unsigned resp_code = get_field(cmd->dword1,
+ RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
+
+ /* Using the access flag to carry response code information */
+ IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
+ return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
+ trace_riscv_iommu_ats_prgr);
+}
+
+static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
+{
+ uint64_t old_ddtp = s->ddtp;
+ uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
+ unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
+ unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
+ bool ok = false;
+
+ /*
+ * Check for allowed DDTP.MODE transitions:
+ * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
+ * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
+ */
+ if (new_mode == old_mode ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
+ ok = true;
+ } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
+ new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
+ ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
+ old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
+ }
+
+ if (ok) {
+ /* clear reserved and busy bits, report back sanitized version */
+ new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
+ RISCV_IOMMU_DDTP_MODE, new_mode);
+ } else {
+ new_ddtp = old_ddtp;
+ }
+ s->ddtp = new_ddtp;
+
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
+}
+
+/* Command function and opcode field. */
+#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
+
+static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
+{
+ struct riscv_iommu_command cmd;
+ MemTxResult res;
+ dma_addr_t addr;
+ uint32_t tail, head, ctrl;
+ uint64_t cmd_opcode;
+ GHFunc func;
+
+ ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+ tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
+ head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
+
+ /* Check for pending error or queue processing disabled */
+ if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
+ !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
+ return;
+ }
+
+ while (tail != head) {
+ addr = s->cq_addr + head * sizeof(cmd);
+ res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
+ MEMTXATTRS_UNSPECIFIED);
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+
+ trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
+
+ cmd_opcode = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
+
+ switch (cmd_opcode) {
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
+ RISCV_IOMMU_CMD_IOFENCE_OPCODE):
+ res = riscv_iommu_iofence(s,
+ cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
+
+ if (res != MEMTX_OK) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CQMF, 0);
+ goto fault;
+ }
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ {
+ bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
+ bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
+ bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
+ uint32_t gscid = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_IOTINVAL_GSCID);
+ uint32_t pscid = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_IOTINVAL_PSCID);
+ hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
+
+ if (pscv) {
+ /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
+ goto cmd_ill;
+ }
+
+ func = riscv_iommu_iot_inval_all;
+
+ if (gv) {
+ func = (av) ? riscv_iommu_iot_inval_gscid_iova :
+ riscv_iommu_iot_inval_gscid;
+ }
+
+ riscv_iommu_iot_inval(
+ s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG);
+
+ riscv_iommu_iot_inval(
+ s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN);
+ break;
+ }
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
+ RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
+ {
+ bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
+ bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
+ bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
+ uint32_t gscid = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_IOTINVAL_GSCID);
+ uint32_t pscid = get_field(cmd.dword0,
+ RISCV_IOMMU_CMD_IOTINVAL_PSCID);
+ hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
+ RISCVIOMMUTransTag transtag;
+
+ if (gv) {
+ transtag = RISCV_IOMMU_TRANS_TAG_VN;
+ if (pscv) {
+ func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova :
+ riscv_iommu_iot_inval_gscid_pscid;
+ } else {
+ func = (av) ? riscv_iommu_iot_inval_gscid_iova :
+ riscv_iommu_iot_inval_gscid;
+ }
+ } else {
+ transtag = RISCV_IOMMU_TRANS_TAG_SS;
+ if (pscv) {
+ func = (av) ? riscv_iommu_iot_inval_pscid_iova :
+ riscv_iommu_iot_inval_pscid;
+ } else {
+ func = (av) ? riscv_iommu_iot_inval_iova :
+ riscv_iommu_iot_inval_all;
+ }
+ }
+
+ riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag);
+ break;
+ }
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* invalidate all device context cache mappings */
+ func = riscv_iommu_ctx_inval_all;
+ } else {
+ /* invalidate all device context matching DID */
+ func = riscv_iommu_ctx_inval_devid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
+ RISCV_IOMMU_CMD_IODIR_OPCODE):
+ if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
+ /* illegal command arguments IODIR_PDT & DV == 0 */
+ goto cmd_ill;
+ } else {
+ func = riscv_iommu_ctx_inval_devid_procid;
+ }
+ riscv_iommu_ctx_inval(s, func,
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
+ get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
+ break;
+
+ /* ATS commands */
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
+ RISCV_IOMMU_CMD_ATS_OPCODE):
+ if (!s->enable_ats) {
+ goto cmd_ill;
+ }
+
+ riscv_iommu_ats_inval(s, &cmd);
+ break;
+
+ case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
+ RISCV_IOMMU_CMD_ATS_OPCODE):
+ if (!s->enable_ats) {
+ goto cmd_ill;
+ }
+
+ riscv_iommu_ats_prgr(s, &cmd);
+ break;
+
+ default:
+ cmd_ill:
+ /* Invalid instruction, do not advance instruction index. */
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
+ RISCV_IOMMU_CQCSR_CMD_ILL, 0);
+ goto fault;
+ }
+
+ /* Advance and update head pointer after command completes. */
+ head = (head + 1) & s->cq_mask;
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
+ }
+ return;
+
+fault:
+ if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
+ }
+}
+
+static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
+ s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
+ s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
+ ctrl_set = RISCV_IOMMU_CQCSR_CQON;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
+ RISCV_IOMMU_CQCSR_FENCE_W_IP;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
+ s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
+ s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
+ ctrl_set = RISCV_IOMMU_FQCSR_FQON;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
+{
+ uint64_t base;
+ uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+ uint32_t ctrl_clr;
+ bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
+ bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
+
+ if (enable && !active) {
+ base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
+ s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
+ s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
+ stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
+ ctrl_set = RISCV_IOMMU_PQCSR_PQON;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF;
+ } else if (!enable && active) {
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
+ } else {
+ ctrl_set = 0;
+ ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
+}
+
+static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
+{
+ uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
+ uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
+ unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
+ unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
+ RISCVIOMMUContext *ctx;
+ void *ref;
+
+ if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
+ return;
+ }
+
+ ctx = riscv_iommu_ctx(s, devid, pid, &ref);
+ if (ctx == NULL) {
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
+ RISCV_IOMMU_TR_RESPONSE_FAULT |
+ (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
+ } else {
+ IOMMUTLBEntry iotlb = {
+ .iova = iova,
+ .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
+ .addr_mask = ~0,
+ .target_as = NULL,
+ };
+ int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
+ if (fault) {
+ iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
+ } else {
+ iova = iotlb.translated_addr & ~iotlb.addr_mask;
+ iova >>= TARGET_PAGE_BITS;
+ iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
+
+ /* We do not support superpages (> 4kbs) for now */
+ iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
+ }
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
+ }
+
+ riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
+ RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+ riscv_iommu_ctx_put(s, ref);
+}
+
+typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
+
+static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
+{
+ uint64_t icvec = 0;
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
+
+ icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
+ s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
+
+ trace_riscv_iommu_icvec_write(data, icvec);
+
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
+}
+
+static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
+{
+ uint32_t cqcsr, fqcsr, pqcsr;
+ uint32_t ipsr_set = 0;
+ uint32_t ipsr_clr = 0;
+
+ if (data & RISCV_IOMMU_IPSR_CIP) {
+ cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
+
+ if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
+ (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
+ cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
+ cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_CIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_FIP) {
+ fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
+
+ if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
+ (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
+ fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_FIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
+ }
+
+ if (data & RISCV_IOMMU_IPSR_PIP) {
+ pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
+
+ if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
+ (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
+ pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
+ ipsr_set |= RISCV_IOMMU_IPSR_PIP;
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+ } else {
+ ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
+ }
+
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
+}
+
+static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
+ uint32_t regb,
+ bool prev_cy_inh)
+{
+ switch (regb) {
+ case RISCV_IOMMU_REG_IOCOUNTINH:
+ riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
+ break;
+
+ case RISCV_IOMMU_REG_IOHPMCYCLES:
+ case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
+ riscv_iommu_process_hpmcycle_write(s);
+ break;
+
+ case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
+ RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
+ riscv_iommu_process_hpmevt_write(s, regb & ~7);
+ break;
+ }
+}
+
+/*
+ * Write the resulting value of 'data' for the reg specified
+ * by 'reg_addr', after considering read-only/read-write/write-clear
+ * bits, in the pointer 'dest'.
+ *
+ * The result is written in little-endian.
+ */
+static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
+ void *dest, hwaddr reg_addr,
+ int size, uint64_t data)
+{
+ uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
+ uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
+ uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
+
+ stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
+}
+
+static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size,
+ MemTxAttrs attrs)
+{
+ riscv_iommu_process_fn *process_fn = NULL;
+ RISCVIOMMUState *s = opaque;
+ uint32_t regb = addr & ~3;
+ uint32_t busy = 0;
+ uint64_t val = 0;
+ bool cy_inh = false;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment or access size */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ /* Unsupported MMIO access location. */
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* Track actionable MMIO write. */
+ switch (regb) {
+ case RISCV_IOMMU_REG_DDTP:
+ case RISCV_IOMMU_REG_DDTP + 4:
+ process_fn = riscv_iommu_process_ddtp;
+ regb = RISCV_IOMMU_REG_DDTP;
+ busy = RISCV_IOMMU_DDTP_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_CQT:
+ process_fn = riscv_iommu_process_cq_tail;
+ break;
+
+ case RISCV_IOMMU_REG_CQCSR:
+ process_fn = riscv_iommu_process_cq_control;
+ busy = RISCV_IOMMU_CQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_FQCSR:
+ process_fn = riscv_iommu_process_fq_control;
+ busy = RISCV_IOMMU_FQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_PQCSR:
+ process_fn = riscv_iommu_process_pq_control;
+ busy = RISCV_IOMMU_PQCSR_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_ICVEC:
+ case RISCV_IOMMU_REG_IPSR:
+ /*
+ * ICVEC and IPSR have special read/write procedures. We'll
+ * call their respective helpers and exit.
+ */
+ riscv_iommu_write_reg_val(s, &val, addr, size, data);
+
+ /*
+ * 'val' is stored as LE. Switch to host endianess
+ * before using it.
+ */
+ val = le64_to_cpu(val);
+
+ if (regb == RISCV_IOMMU_REG_ICVEC) {
+ riscv_iommu_update_icvec(s, val);
+ } else {
+ riscv_iommu_update_ipsr(s, val);
+ }
+
+ return MEMTX_OK;
+
+ case RISCV_IOMMU_REG_TR_REQ_CTL:
+ process_fn = riscv_iommu_process_dbg;
+ regb = RISCV_IOMMU_REG_TR_REQ_CTL;
+ busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
+ break;
+
+ case RISCV_IOMMU_REG_IOCOUNTINH:
+ if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
+ break;
+ }
+ /* Store previous value of CY bit. */
+ cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
+ RISCV_IOMMU_IOCOUNTINH_CY);
+ break;
+
+
+ default:
+ break;
+ }
+
+ /*
+ * Registers update might be not synchronized with core logic.
+ * If system software updates register when relevant BUSY bit
+ * is set IOMMU behavior of additional writes to the register
+ * is UNSPECIFIED.
+ */
+ riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
+
+ /* Busy flag update, MSB 4-byte register. */
+ if (busy) {
+ uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
+ stl_le_p(&s->regs_rw[regb], rw | busy);
+ }
+
+ /* Process HPM writes and update any internal state if needed. */
+ if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
+ regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
+ riscv_iommu_process_hpm_writes(s, regb, cy_inh);
+ }
+
+ if (process_fn) {
+ process_fn(s);
+ }
+
+ return MEMTX_OK;
+}
+
+static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState *s = opaque;
+ uint64_t val = -1;
+ uint8_t *ptr;
+
+ if ((addr & (size - 1)) != 0) {
+ /* Unsupported MMIO alignment. */
+ return MEMTX_ERROR;
+ }
+
+ if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* Compute cycle register value. */
+ if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
+ val = riscv_iommu_hpmcycle_read(s);
+ ptr = (uint8_t *)&val + (addr & 7);
+ } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
+ /*
+ * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
+ * callback completes. In which case CY_OF bit in
+ * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
+ * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
+ * it's not dependent over the timer callback and is computed
+ * from cycle overflow.
+ */
+ val = ldq_le_p(&s->regs_rw[addr]);
+ val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
+ ? RISCV_IOMMU_IOCOUNTOVF_CY
+ : 0;
+ ptr = (uint8_t *)&val + (addr & 3);
+ } else {
+ ptr = &s->regs_rw[addr];
+ }
+
+ val = ldn_le_p(ptr, size);
+
+ *data = val;
+
+ return MEMTX_OK;
+}
+
+static const MemoryRegionOps riscv_iommu_mmio_ops = {
+ .read_with_attrs = riscv_iommu_mmio_read,
+ .write_with_attrs = riscv_iommu_mmio_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+/*
+ * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
+ * memory region as untranslated address, for additional MSI/MRIF interception
+ * by IOMMU interrupt remapping implementation.
+ * Note: Device emulation code generating an MSI is expected to provide a valid
+ * memory transaction attributes with requested_id set.
+ */
+static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size, MemTxAttrs attrs)
+{
+ RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
+ RISCVIOMMUContext *ctx;
+ MemTxResult res;
+ void *ref;
+ uint32_t devid = attrs.requester_id;
+
+ if (attrs.unspecified) {
+ return MEMTX_ACCESS_ERROR;
+ }
+
+ /* FIXME: PCIe bus remapping for attached endpoints. */
+ devid |= s->bus << 8;
+
+ ctx = riscv_iommu_ctx(s, devid, 0, &ref);
+ if (ctx == NULL) {
+ res = MEMTX_ACCESS_ERROR;
+ } else {
+ res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
+ }
+ riscv_iommu_ctx_put(s, ref);
+ return res;
+}
+
+static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
+ uint64_t *data, unsigned size, MemTxAttrs attrs)
+{
+ return MEMTX_ACCESS_ERROR;
+}
+
+static const MemoryRegionOps riscv_iommu_trap_ops = {
+ .read_with_attrs = riscv_iommu_trap_read,
+ .write_with_attrs = riscv_iommu_trap_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ .unaligned = true,
+ },
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ }
+};
+
+void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode)
+{
+ s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode);
+}
+
+static void riscv_iommu_instance_init(Object *obj)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(obj);
+
+ /* Enable translation debug interface */
+ s->cap = RISCV_IOMMU_CAP_DBG;
+
+ /* Report QEMU target physical address space limits */
+ s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
+ TARGET_PHYS_ADDR_SPACE_BITS);
+
+ /* TODO: method to report supported PID bits */
+ s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
+ s->cap |= RISCV_IOMMU_CAP_PD8;
+
+ /* register storage */
+ s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+ s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
+
+ /* Mark all registers read-only */
+ memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
+
+ /* Device translation context cache */
+ s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
+ riscv_iommu_ctx_equal,
+ g_free, NULL);
+
+ s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
+ riscv_iommu_iot_equal,
+ g_free, NULL);
+
+ s->iommus.le_next = NULL;
+ s->iommus.le_prev = NULL;
+ QLIST_INIT(&s->spaces);
+}
+
+static void riscv_iommu_realize(DeviceState *dev, Error **errp)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ s->cap |= s->version & RISCV_IOMMU_CAP_VERSION;
+ if (s->enable_msi) {
+ s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
+ }
+ if (s->enable_ats) {
+ s->cap |= RISCV_IOMMU_CAP_ATS;
+ }
+ if (s->enable_s_stage) {
+ s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
+ RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
+ }
+ if (s->enable_g_stage) {
+ s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
+ RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
+ }
+
+ if (s->hpm_cntrs > 0) {
+ /* Clip number of HPM counters to maximum supported (31). */
+ if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
+ s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
+ }
+ /* Enable hardware performance monitor interface */
+ s->cap |= RISCV_IOMMU_CAP_HPM;
+ }
+
+ /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
+ s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
+ RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
+
+ /*
+ * Register complete MMIO space, including MSI/PBA registers.
+ * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
+ * managed directly by the PCIDevice implementation.
+ */
+ memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
+ "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
+
+ /* Set power-on register state */
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
+ ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
+ ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
+ ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
+ ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
+ ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
+ RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
+ RISCV_IOMMU_CQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
+ RISCV_IOMMU_FQCSR_FQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
+ RISCV_IOMMU_FQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
+ RISCV_IOMMU_PQCSR_PQOF);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
+ RISCV_IOMMU_PQCSR_BUSY);
+ stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
+ /* If debug registers enabled. */
+ if (s->cap & RISCV_IOMMU_CAP_DBG) {
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
+ RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+ }
+
+ /* If HPM registers are enabled. */
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ /* +1 for cycle counter bit. */
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
+ ~((2 << s->hpm_cntrs) - 1));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
+ memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
+ 0x00, s->hpm_cntrs * 8);
+ memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
+ 0x00, s->hpm_cntrs * 8);
+ }
+
+ /* Memory region for downstream access, if specified. */
+ if (s->target_mr) {
+ s->target_as = g_new0(AddressSpace, 1);
+ address_space_init(s->target_as, s->target_mr,
+ "riscv-iommu-downstream");
+ } else {
+ /* Fallback to global system memory. */
+ s->target_as = &address_space_memory;
+ }
+
+ /* Memory region for untranslated MRIF/MSI writes */
+ memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
+ "riscv-iommu-trap", ~0ULL);
+ address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
+
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ s->hpm_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
+ s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
+ }
+}
+
+static void riscv_iommu_unrealize(DeviceState *dev)
+{
+ RISCVIOMMUState *s = RISCV_IOMMU(dev);
+
+ g_hash_table_unref(s->iot_cache);
+ g_hash_table_unref(s->ctx_cache);
+
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ g_hash_table_unref(s->hpm_event_ctr_map);
+ timer_free(s->hpm_timer);
+ }
+}
+
+void riscv_iommu_reset(RISCVIOMMUState *s)
+{
+ uint32_t reg_clr;
+ int ddtp_mode;
+
+ /*
+ * Clear DDTP while setting DDTP_mode back to user
+ * initial setting.
+ */
+ ddtp_mode = s->enable_off ?
+ RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE;
+ s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode);
+ riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp);
+
+ reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE |
+ RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY;
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr);
+
+ reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE |
+ RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY;
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr);
+
+ reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE |
+ RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY;
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr);
+
+ riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
+ RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+
+ riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0);
+
+ g_hash_table_remove_all(s->ctx_cache);
+ g_hash_table_remove_all(s->iot_cache);
+}
+
+static const Property riscv_iommu_properties[] = {
+ DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
+ RISCV_IOMMU_SPEC_DOT_VER),
+ DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
+ DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
+ LIMIT_CACHE_IOT),
+ DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
+ DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
+ DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
+ DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
+ DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
+ DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
+ RISCV_IOMMU_IOCOUNT_NUM),
+};
+
+static void riscv_iommu_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
+ dc->user_creatable = false;
+ dc->realize = riscv_iommu_realize;
+ dc->unrealize = riscv_iommu_unrealize;
+ device_class_set_props(dc, riscv_iommu_properties);
+}
+
+static const TypeInfo riscv_iommu_info = {
+ .name = TYPE_RISCV_IOMMU,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(RISCVIOMMUState),
+ .instance_init = riscv_iommu_instance_init,
+ .class_init = riscv_iommu_class_init,
+};
+
+static const char *IOMMU_FLAG_STR[] = {
+ "NA",
+ "RO",
+ "WR",
+ "RW",
+};
+
+/* RISC-V IOMMU Memory Region - Address Translation Space */
+static IOMMUTLBEntry riscv_iommu_memory_region_translate(
+ IOMMUMemoryRegion *iommu_mr, hwaddr addr,
+ IOMMUAccessFlags flag, int iommu_idx)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+ RISCVIOMMUContext *ctx;
+ void *ref;
+ IOMMUTLBEntry iotlb = {
+ .iova = addr,
+ .target_as = as->iommu->target_as,
+ .addr_mask = ~0ULL,
+ .perm = flag,
+ };
+
+ ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
+ if (ctx == NULL) {
+ /* Translation disabled or invalid. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
+ /* Translation disabled or fault reported. */
+ iotlb.addr_mask = 0;
+ iotlb.perm = IOMMU_NONE;
+ }
+
+ /* Trace all dma translations with original access flags. */
+ trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
+ PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
+ IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
+ iotlb.translated_addr);
+
+ riscv_iommu_ctx_put(as->iommu, ref);
+
+ return iotlb;
+}
+
+static int riscv_iommu_memory_region_notify(
+ IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new, Error **errp)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+
+ if (old == IOMMU_NOTIFIER_NONE) {
+ as->notifier = true;
+ trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
+ } else if (new == IOMMU_NOTIFIER_NONE) {
+ as->notifier = false;
+ trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
+ }
+
+ return 0;
+}
+
+static inline bool pci_is_iommu(PCIDevice *pdev)
+{
+ return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
+}
+
+static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
+{
+ RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
+ PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
+ AddressSpace *as = NULL;
+
+ if (pdev && pci_is_iommu(pdev)) {
+ return s->target_as;
+ }
+
+ /* Find first registered IOMMU device */
+ while (s->iommus.le_prev) {
+ s = *(s->iommus.le_prev);
+ }
+
+ /* Find first matching IOMMU */
+ while (s != NULL && as == NULL) {
+ as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
+ s = s->iommus.le_next;
+ }
+
+ return as ? as : &address_space_memory;
+}
+
+static const PCIIOMMUOps riscv_iommu_ops = {
+ .get_address_space = riscv_iommu_find_as,
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp)
+{
+ if (bus->iommu_ops &&
+ bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
+ /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
+ RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
+ QLIST_INSERT_AFTER(last, iommu, iommus);
+ } else if (!bus->iommu_ops && !bus->iommu_opaque) {
+ pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
+ } else {
+ error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
+ pci_bus_num(bus));
+ }
+}
+
+static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
+ MemTxAttrs attrs)
+{
+ return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
+}
+
+static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
+{
+ RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
+ return 1 << as->iommu->pid_bits;
+}
+
+static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = riscv_iommu_memory_region_translate;
+ imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
+ imrc->attrs_to_index = riscv_iommu_memory_region_index;
+ imrc->num_indexes = riscv_iommu_memory_region_index_len;
+}
+
+static const TypeInfo riscv_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
+ .class_init = riscv_iommu_memory_region_init,
+};
+
+static void riscv_iommu_register_mr_types(void)
+{
+ type_register_static(&riscv_iommu_memory_region_info);
+ type_register_static(&riscv_iommu_info);
+}
+
+type_init(riscv_iommu_register_mr_types);
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
new file mode 100644
index 0000000..a31aa62
--- /dev/null
+++ b/hw/riscv/riscv-iommu.h
@@ -0,0 +1,157 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_STATE_H
+#define HW_RISCV_IOMMU_STATE_H
+
+#include "qom/object.h"
+#include "hw/qdev-properties.h"
+#include "system/dma.h"
+#include "hw/riscv/iommu.h"
+#include "hw/riscv/riscv-iommu-bits.h"
+
+typedef enum riscv_iommu_igs_modes riscv_iommu_igs_mode;
+
+struct RISCVIOMMUState {
+ /*< private >*/
+ DeviceState parent_obj;
+
+ /*< public >*/
+ uint32_t version; /* Reported interface version number */
+ uint32_t pid_bits; /* process identifier width */
+ uint32_t bus; /* PCI bus mapping for non-root endpoints */
+
+ uint64_t cap; /* IOMMU supported capabilities */
+ uint64_t fctl; /* IOMMU enabled features */
+ uint64_t icvec_avail_vectors; /* Available interrupt vectors in ICVEC */
+
+ bool enable_off; /* Enable out-of-reset OFF mode (DMA disabled) */
+ bool enable_msi; /* Enable MSI remapping */
+ bool enable_ats; /* Enable ATS support */
+ bool enable_s_stage; /* Enable S/VS-Stage translation */
+ bool enable_g_stage; /* Enable G-Stage translation */
+
+ /* IOMMU Internal State */
+ uint64_t ddtp; /* Validated Device Directory Tree Root Pointer */
+
+ dma_addr_t cq_addr; /* Command queue base physical address */
+ dma_addr_t fq_addr; /* Fault/event queue base physical address */
+ dma_addr_t pq_addr; /* Page request queue base physical address */
+
+ uint32_t cq_mask; /* Command queue index bit mask */
+ uint32_t fq_mask; /* Fault/event queue index bit mask */
+ uint32_t pq_mask; /* Page request queue index bit mask */
+
+ /* interrupt notifier */
+ void (*notify)(RISCVIOMMUState *iommu, unsigned vector);
+
+ /* IOMMU target address space */
+ AddressSpace *target_as;
+ MemoryRegion *target_mr;
+
+ /* MSI / MRIF access trap */
+ AddressSpace trap_as;
+ MemoryRegion trap_mr;
+
+ GHashTable *ctx_cache; /* Device translation Context Cache */
+
+ GHashTable *iot_cache; /* IO Translated Address Cache */
+ unsigned iot_limit; /* IO Translation Cache size limit */
+
+ /* MMIO Hardware Interface */
+ MemoryRegion regs_mr;
+ uint8_t *regs_rw; /* register state (user write) */
+ uint8_t *regs_wc; /* write-1-to-clear mask */
+ uint8_t *regs_ro; /* read-only mask */
+
+ QLIST_ENTRY(RISCVIOMMUState) iommus;
+ QLIST_HEAD(, RISCVIOMMUSpace) spaces;
+
+ /* HPM cycle counter */
+ QEMUTimer *hpm_timer;
+ uint64_t hpmcycle_val; /* Current value of cycle register */
+ uint64_t hpmcycle_prev; /* Saved value of QEMU_CLOCK_VIRTUAL clock */
+ uint64_t irq_overflow_left; /* Value beyond INT64_MAX after overflow */
+
+ /* HPM event counters */
+ GHashTable *hpm_event_ctr_map; /* Mapping of events to counters */
+ uint8_t hpm_cntrs;
+};
+
+void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
+ Error **errp);
+void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode);
+void riscv_iommu_reset(RISCVIOMMUState *s);
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type);
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+ uint64_t devid:24; /* Requester Id, AKA device_id */
+ uint64_t process_id:20; /* Process ID. PASID for PCIe */
+ uint64_t tc; /* Translation Control */
+ uint64_t ta; /* Translation Attributes */
+ uint64_t satp; /* S-Stage address translation and protection */
+ uint64_t gatp; /* G-Stage address translation and protection */
+ uint64_t msi_addr_mask; /* MSI filtering - address mask */
+ uint64_t msi_addr_pattern; /* MSI filtering - address pattern */
+ uint64_t msiptp; /* MSI redirection page table pointer */
+};
+
+/* private helpers */
+
+/* Register helper functions */
+static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s,
+ unsigned idx, uint32_t set, uint32_t clr)
+{
+ uint32_t val = ldl_le_p(s->regs_rw + idx);
+ stl_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s, unsigned idx,
+ uint32_t set)
+{
+ stl_le_p(s->regs_rw + idx, set);
+}
+
+static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s, unsigned idx)
+{
+ return ldl_le_p(s->regs_rw + idx);
+}
+
+static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s, unsigned idx,
+ uint64_t set, uint64_t clr)
+{
+ uint64_t val = ldq_le_p(s->regs_rw + idx);
+ stq_le_p(s->regs_rw + idx, (val & ~clr) | set);
+ return val;
+}
+
+static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s, unsigned idx,
+ uint64_t set)
+{
+ stq_le_p(s->regs_rw + idx, set);
+}
+
+static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s,
+ unsigned idx)
+{
+ return ldq_le_p(s->regs_rw + idx);
+}
+#endif
diff --git a/hw/riscv/riscv_hart.c b/hw/riscv/riscv_hart.c
index 613ea2a..7f26760 100644
--- a/hw/riscv/riscv_hart.c
+++ b/hw/riscv/riscv_hart.c
@@ -21,19 +21,38 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/module.h"
-#include "sysemu/reset.h"
+#include "system/reset.h"
+#include "system/qtest.h"
+#include "qemu/cutils.h"
#include "hw/sysbus.h"
#include "target/riscv/cpu.h"
#include "hw/qdev-properties.h"
#include "hw/riscv/riscv_hart.h"
+#include "qemu/error-report.h"
-static Property riscv_harts_props[] = {
+static const Property riscv_harts_props[] = {
DEFINE_PROP_UINT32("num-harts", RISCVHartArrayState, num_harts, 1),
DEFINE_PROP_UINT32("hartid-base", RISCVHartArrayState, hartid_base, 0),
DEFINE_PROP_STRING("cpu-type", RISCVHartArrayState, cpu_type),
DEFINE_PROP_UINT64("resetvec", RISCVHartArrayState, resetvec,
DEFAULT_RSTVEC),
- DEFINE_PROP_END_OF_LIST(),
+
+ /*
+ * Smrnmi implementation-defined interrupt and exception trap handlers.
+ *
+ * When an RNMI interrupt is detected, the hart then enters M-mode and
+ * jumps to the address defined by "rnmi-interrupt-vector".
+ *
+ * When the hart encounters an exception while executing in M-mode with
+ * the mnstatus.NMIE bit clear, the hart then jumps to the address
+ * defined by "rnmi-exception-vector".
+ */
+ DEFINE_PROP_ARRAY("rnmi-interrupt-vector", RISCVHartArrayState,
+ num_rnmi_irqvec, rnmi_irqvec, qdev_prop_uint64,
+ uint64_t),
+ DEFINE_PROP_ARRAY("rnmi-exception-vector", RISCVHartArrayState,
+ num_rnmi_excpvec, rnmi_excpvec, qdev_prop_uint64,
+ uint64_t),
};
static void riscv_harts_cpu_reset(void *opaque)
@@ -42,11 +61,85 @@ static void riscv_harts_cpu_reset(void *opaque)
cpu_reset(CPU(cpu));
}
+#ifndef CONFIG_USER_ONLY
+static void csr_call(char *cmd, uint64_t cpu_num, int csrno, uint64_t *val)
+{
+ RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(cpu_num));
+ CPURISCVState *env = &cpu->env;
+
+ int ret = RISCV_EXCP_NONE;
+ if (strcmp(cmd, "get_csr") == 0) {
+ ret = riscv_csrr(env, csrno, (target_ulong *)val);
+ } else if (strcmp(cmd, "set_csr") == 0) {
+ ret = riscv_csrrw(env, csrno, NULL, *(target_ulong *)val,
+ MAKE_64BIT_MASK(0, TARGET_LONG_BITS), 0);
+ }
+
+ g_assert(ret == RISCV_EXCP_NONE);
+}
+
+static bool csr_qtest_callback(CharBackend *chr, gchar **words)
+{
+ if (strcmp(words[0], "csr") == 0) {
+
+ uint64_t cpu;
+ uint64_t val;
+ int rc, csr;
+
+ rc = qemu_strtou64(words[2], NULL, 0, &cpu);
+ g_assert(rc == 0);
+ rc = qemu_strtoi(words[3], NULL, 0, &csr);
+ g_assert(rc == 0);
+ rc = qemu_strtou64(words[4], NULL, 0, &val);
+ g_assert(rc == 0);
+ csr_call(words[1], cpu, csr, &val);
+
+ qtest_sendf(chr, "OK 0 "TARGET_FMT_lx"\n", (target_ulong)val);
+
+ return true;
+ }
+
+ return false;
+}
+
+static void riscv_cpu_register_csr_qtest_callback(void)
+{
+ static bool first = true;
+ if (first) {
+ first = false;
+ qtest_set_command_cb(csr_qtest_callback);
+ }
+}
+#endif
+
static bool riscv_hart_realize(RISCVHartArrayState *s, int idx,
char *cpu_type, Error **errp)
{
object_initialize_child(OBJECT(s), "harts[*]", &s->harts[idx], cpu_type);
qdev_prop_set_uint64(DEVICE(&s->harts[idx]), "resetvec", s->resetvec);
+
+ if (s->harts[idx].cfg.ext_smrnmi) {
+ if (idx < s->num_rnmi_irqvec) {
+ qdev_prop_set_uint64(DEVICE(&s->harts[idx]),
+ "rnmi-interrupt-vector", s->rnmi_irqvec[idx]);
+ }
+
+ if (idx < s->num_rnmi_excpvec) {
+ qdev_prop_set_uint64(DEVICE(&s->harts[idx]),
+ "rnmi-exception-vector", s->rnmi_excpvec[idx]);
+ }
+ } else {
+ if (s->num_rnmi_irqvec > 0) {
+ warn_report_once("rnmi-interrupt-vector property is ignored "
+ "because Smrnmi extension is not enabled.");
+ }
+
+ if (s->num_rnmi_excpvec > 0) {
+ warn_report_once("rnmi-exception-vector property is ignored "
+ "because Smrnmi extension is not enabled.");
+ }
+ }
+
s->harts[idx].env.mhartid = s->hartid_base + idx;
qemu_register_reset(riscv_harts_cpu_reset, &s->harts[idx]);
return qdev_realize(DEVICE(&s->harts[idx]), NULL, errp);
@@ -59,6 +152,10 @@ static void riscv_harts_realize(DeviceState *dev, Error **errp)
s->harts = g_new0(RISCVCPU, s->num_harts);
+#ifndef CONFIG_USER_ONLY
+ riscv_cpu_register_csr_qtest_callback();
+#endif
+
for (n = 0; n < s->num_harts; n++) {
if (!riscv_hart_realize(s, n, s->cpu_type, errp)) {
return;
@@ -66,7 +163,7 @@ static void riscv_harts_realize(DeviceState *dev, Error **errp)
}
}
-static void riscv_harts_class_init(ObjectClass *klass, void *data)
+static void riscv_harts_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c
index 3888034..3e7f441 100644
--- a/hw/riscv/shakti_c.c
+++ b/hw/riscv/shakti_c.c
@@ -23,9 +23,9 @@
#include "qemu/error-report.h"
#include "hw/intc/sifive_plic.h"
#include "hw/intc/riscv_aclint.h"
-#include "sysemu/sysemu.h"
+#include "system/system.h"
#include "hw/qdev-properties.h"
-#include "exec/address-spaces.h"
+#include "system/address-spaces.h"
#include "hw/riscv/boot.h"
static const struct MemmapEntry {
@@ -45,6 +45,7 @@ static void shakti_c_machine_state_init(MachineState *mstate)
{
ShaktiCMachineState *sms = RISCV_SHAKTI_MACHINE(mstate);
MemoryRegion *system_memory = get_system_memory();
+ hwaddr firmware_load_addr = shakti_c_memmap[SHAKTI_C_RAM].base;
/* Initialize SoC */
object_initialize_child(OBJECT(mstate), "soc", &sms->soc,
@@ -56,23 +57,21 @@ static void shakti_c_machine_state_init(MachineState *mstate)
shakti_c_memmap[SHAKTI_C_RAM].base,
mstate->ram);
+ if (mstate->firmware) {
+ riscv_load_firmware(mstate->firmware, &firmware_load_addr, NULL);
+ }
+
/* ROM reset vector */
- riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus,
- shakti_c_memmap[SHAKTI_C_RAM].base,
+ riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus, firmware_load_addr,
shakti_c_memmap[SHAKTI_C_ROM].base,
shakti_c_memmap[SHAKTI_C_ROM].size, 0, 0);
- if (mstate->firmware) {
- riscv_load_firmware(mstate->firmware,
- shakti_c_memmap[SHAKTI_C_RAM].base,
- NULL);
- }
}
static void shakti_c_machine_instance_init(Object *obj)
{
}
-static void shakti_c_machine_class_init(ObjectClass *klass, void *data)
+static void shakti_c_machine_class_init(ObjectClass *klass, const void *data)
{
MachineClass *mc = MACHINE_CLASS(klass);
static const char * const valid_cpu_types[] = {
@@ -143,7 +142,7 @@ static void shakti_c_soc_state_realize(DeviceState *dev, Error **errp)
shakti_c_memmap[SHAKTI_C_ROM].base, &sss->rom);
}
-static void shakti_c_soc_class_init(ObjectClass *klass, void *data)
+static void shakti_c_soc_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = shakti_c_soc_state_realize;
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index 87d9602..7baed19 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -35,7 +35,6 @@
#include "hw/boards.h"
#include "hw/loader.h"
#include "hw/sysbus.h"
-#include "hw/char/serial.h"
#include "hw/misc/unimp.h"
#include "target/riscv/cpu.h"
#include "hw/riscv/riscv_hart.h"
@@ -47,7 +46,7 @@
#include "hw/misc/sifive_e_prci.h"
#include "hw/misc/sifive_e_aon.h"
#include "chardev/char.h"
-#include "sysemu/sysemu.h"
+#include "system/system.h"
static const MemMapEntry sifive_e_memmap[] = {
[SIFIVE_E_DEV_DEBUG] = { 0x0, 0x1000 },
@@ -79,6 +78,7 @@ static void sifive_e_machine_init(MachineState *machine)
SiFiveEState *s = RISCV_E_MACHINE(machine);
MemoryRegion *sys_mem = get_system_memory();
int i;
+ RISCVBootInfo boot_info;
if (machine->ram_size != mc->default_ram_size) {
char *sz = size_to_str(mc->default_ram_size);
@@ -114,8 +114,9 @@ static void sifive_e_machine_init(MachineState *machine)
rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
+ riscv_boot_info_init(&boot_info, &s->soc.cpus);
if (machine->kernel_filename) {
- riscv_load_kernel(machine, &s->soc.cpus,
+ riscv_load_kernel(machine, &boot_info,
memmap[SIFIVE_E_DEV_DTIM].base,
false, NULL);
}
@@ -142,7 +143,7 @@ static void sifive_e_machine_instance_init(Object *obj)
s->revb = false;
}
-static void sifive_e_machine_class_init(ObjectClass *oc, void *data)
+static void sifive_e_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -283,7 +284,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
&s->xip_mem);
}
-static void sifive_e_soc_class_init(ObjectClass *oc, void *data)
+static void sifive_e_soc_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index af5f923..d69f942 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -43,7 +43,6 @@
#include "hw/irq.h"
#include "hw/loader.h"
#include "hw/sysbus.h"
-#include "hw/char/serial.h"
#include "hw/cpu/cluster.h"
#include "hw/misc/unimp.h"
#include "hw/sd/sd.h"
@@ -57,9 +56,9 @@
#include "hw/intc/sifive_plic.h"
#include "chardev/char.h"
#include "net/eth.h"
-#include "sysemu/device_tree.h"
-#include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
+#include "system/device_tree.h"
+#include "system/runstate.h"
+#include "system/system.h"
#include <libfdt.h>
@@ -515,17 +514,19 @@ static void sifive_u_machine_init(MachineState *machine)
SiFiveUState *s = RISCV_U_MACHINE(machine);
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *flash0 = g_new(MemoryRegion, 1);
- target_ulong start_addr = memmap[SIFIVE_U_DEV_DRAM].base;
+ hwaddr start_addr = memmap[SIFIVE_U_DEV_DRAM].base;
target_ulong firmware_end_addr, kernel_start_addr;
const char *firmware_name;
uint32_t start_addr_hi32 = 0x00000000;
+ uint32_t fdt_load_addr_hi32 = 0x00000000;
int i;
- uint32_t fdt_load_addr;
+ uint64_t fdt_load_addr;
uint64_t kernel_entry;
DriveInfo *dinfo;
BlockBackend *blk;
DeviceState *flash_dev, *sd_dev, *card_dev;
qemu_irq flash_cs, sd_cs;
+ RISCVBootInfo boot_info;
/* Initialize SoC */
object_initialize_child(OBJECT(machine), "soc", &s->soc, TYPE_RISCV_U_SOC);
@@ -589,14 +590,15 @@ static void sifive_u_machine_init(MachineState *machine)
firmware_name = riscv_default_firmware_name(&s->soc.u_cpus);
firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
- start_addr, NULL);
+ &start_addr, NULL);
+ riscv_boot_info_init(&boot_info, &s->soc.u_cpus);
if (machine->kernel_filename) {
- kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
+ kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info,
firmware_end_addr);
-
- kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
- kernel_start_addr, true, NULL);
+ riscv_load_kernel(machine, &boot_info, kernel_start_addr,
+ true, NULL);
+ kernel_entry = boot_info.image_low_addr;
} else {
/*
* If dynamic firmware is used, it doesn't know where is the next mode
@@ -607,11 +609,12 @@ static void sifive_u_machine_init(MachineState *machine)
fdt_load_addr = riscv_compute_fdt_addr(memmap[SIFIVE_U_DEV_DRAM].base,
memmap[SIFIVE_U_DEV_DRAM].size,
- machine);
+ machine, &boot_info);
riscv_load_fdt(fdt_load_addr, machine->fdt);
if (!riscv_is_32bit(&s->soc.u_cpus)) {
start_addr_hi32 = (uint64_t)start_addr >> 32;
+ fdt_load_addr_hi32 = fdt_load_addr >> 32;
}
/* reset vector */
@@ -626,7 +629,7 @@ static void sifive_u_machine_init(MachineState *machine)
start_addr, /* start: .dword */
start_addr_hi32,
fdt_load_addr, /* fdt_laddr: .dword */
- 0x00000000,
+ fdt_load_addr_hi32,
0x00000000,
/* fw_dyn: */
};
@@ -646,7 +649,8 @@ static void sifive_u_machine_init(MachineState *machine)
rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec),
memmap[SIFIVE_U_DEV_MROM].base, &address_space_memory);
- riscv_rom_copy_firmware_info(machine, memmap[SIFIVE_U_DEV_MROM].base,
+ riscv_rom_copy_firmware_info(machine, &s->soc.u_cpus,
+ memmap[SIFIVE_U_DEV_MROM].base,
memmap[SIFIVE_U_DEV_MROM].size,
sizeof(reset_vec), kernel_entry);
@@ -709,7 +713,7 @@ static void sifive_u_machine_instance_init(Object *obj)
object_property_set_description(obj, "serial", "Board serial number");
}
-static void sifive_u_machine_class_init(ObjectClass *oc, void *data)
+static void sifive_u_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -720,6 +724,7 @@ static void sifive_u_machine_class_init(ObjectClass *oc, void *data)
mc->default_cpu_type = SIFIVE_U_CPU;
mc->default_cpus = mc->min_cpus;
mc->default_ram_id = "riscv.sifive.u.ram";
+ mc->auto_create_sdcard = true;
object_class_property_add_bool(oc, "start-in-flash",
sifive_u_machine_get_start_in_flash,
@@ -936,13 +941,12 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_QSPI2_IRQ));
}
-static Property sifive_u_soc_props[] = {
+static const Property sifive_u_soc_props[] = {
DEFINE_PROP_UINT32("serial", SiFiveUSoCState, serial, OTP_SERIAL),
DEFINE_PROP_STRING("cpu-type", SiFiveUSoCState, cpu_type),
- DEFINE_PROP_END_OF_LIST()
};
-static void sifive_u_soc_class_init(ObjectClass *oc, void *data)
+static void sifive_u_soc_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index 6407439..641aae8 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -36,8 +36,8 @@
#include "hw/char/riscv_htif.h"
#include "hw/intc/riscv_aclint.h"
#include "chardev/char.h"
-#include "sysemu/device_tree.h"
-#include "sysemu/sysemu.h"
+#include "system/device_tree.h"
+#include "system/system.h"
#include <libfdt.h>
@@ -198,13 +198,15 @@ static void spike_board_init(MachineState *machine)
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
target_ulong firmware_end_addr = memmap[SPIKE_DRAM].base;
+ hwaddr firmware_load_addr = memmap[SPIKE_DRAM].base;
target_ulong kernel_start_addr;
char *firmware_name;
- uint32_t fdt_load_addr;
+ uint64_t fdt_load_addr;
uint64_t kernel_entry;
char *soc_name;
int i, base_hartid, hart_count;
bool htif_custom_base = false;
+ RISCVBootInfo boot_info;
/* Check socket count limit */
if (SPIKE_SOCKETS_MAX < riscv_socket_count(machine)) {
@@ -290,7 +292,7 @@ static void spike_board_init(MachineState *machine)
/* Load firmware */
if (firmware_name) {
firmware_end_addr = riscv_load_firmware(firmware_name,
- memmap[SPIKE_DRAM].base,
+ &firmware_load_addr,
htif_symbol_callback);
g_free(firmware_name);
}
@@ -299,13 +301,14 @@ static void spike_board_init(MachineState *machine)
create_fdt(s, memmap, riscv_is_32bit(&s->soc[0]), htif_custom_base);
/* Load kernel */
+ riscv_boot_info_init(&boot_info, &s->soc[0]);
if (machine->kernel_filename) {
- kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
+ kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info,
firmware_end_addr);
- kernel_entry = riscv_load_kernel(machine, &s->soc[0],
- kernel_start_addr,
- true, htif_symbol_callback);
+ riscv_load_kernel(machine, &boot_info, kernel_start_addr,
+ true, htif_symbol_callback);
+ kernel_entry = boot_info.image_low_addr;
} else {
/*
* If dynamic firmware is used, it doesn't know where is the next mode
@@ -316,11 +319,11 @@ static void spike_board_init(MachineState *machine)
fdt_load_addr = riscv_compute_fdt_addr(memmap[SPIKE_DRAM].base,
memmap[SPIKE_DRAM].size,
- machine);
+ machine, &boot_info);
riscv_load_fdt(fdt_load_addr, machine->fdt);
/* load the reset vector */
- riscv_setup_rom_reset_vec(machine, &s->soc[0], memmap[SPIKE_DRAM].base,
+ riscv_setup_rom_reset_vec(machine, &s->soc[0], firmware_load_addr,
memmap[SPIKE_MROM].base,
memmap[SPIKE_MROM].size, kernel_entry,
fdt_load_addr);
@@ -339,7 +342,7 @@ static void spike_machine_instance_init(Object *obj)
{
}
-static void spike_machine_class_init(ObjectClass *oc, void *data)
+static void spike_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
new file mode 100644
index 0000000..b50b14a
--- /dev/null
+++ b/hw/riscv/trace-events
@@ -0,0 +1,26 @@
+# See documentation at docs/devel/tracing.rst
+
+# riscv-iommu.c
+riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f) "%s: device attached %04x:%02x.%d"
+riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f, uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason: 0x%"PRIx64" iova: 0x%"PRIx64
+riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64
+riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f, unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI 0x%"PRIx64" -> 0x%"PRIx64
+riscv_iommu_mrif_notification(const char *id, uint32_t nid, uint64_t phys) "%s: sent MRIF notification 0x%x to 0x%"PRIx64
+riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command 0x%"PRIx64" 0x%"PRIx64
+riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added"
+riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier removed"
+riscv_iommu_notify_int_vector(uint32_t cause, uint32_t vector) "Interrupt cause 0x%x sent via vector 0x%x"
+riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0x%x actual 0x%x"
+riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64
+riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate"
+riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response"
+riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u"
+riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%"PRIx64" msi_data 0x%x result %u"
+riscv_iommu_sys_reset_hold(int reset_type) "reset type %d"
+riscv_iommu_pci_reset_hold(int reset_type) "reset type %d"
+riscv_iommu_hpm_read(uint64_t cycle, uint32_t inhibit, uint64_t ctr_prev, uint64_t ctr_val) "cycle 0x%"PRIx64" inhibit 0x%x ctr_prev 0x%"PRIx64" ctr_val 0x%"PRIx64
+riscv_iommu_hpm_incr_ctr(uint64_t cntr_val) "cntr_val 0x%"PRIx64
+riscv_iommu_hpm_iocntinh_cy(bool prev_cy_inh) "prev_cy_inh %d"
+riscv_iommu_hpm_cycle_write(uint32_t ovf, uint64_t val) "ovf 0x%x val 0x%"PRIx64
+riscv_iommu_hpm_evt_write(uint32_t ctr_idx, uint32_t ovf, uint64_t val) "ctr_idx 0x%x ovf 0x%x val 0x%"PRIx64
diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h
new file mode 100644
index 0000000..8c0e3ca
--- /dev/null
+++ b/hw/riscv/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_riscv.h"
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 0925528..8b5683d 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -38,7 +38,7 @@
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "sysemu/reset.h"
+#include "system/reset.h"
#define ACPI_BUILD_TABLE_SIZE 0x20000
#define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index))
@@ -141,12 +141,36 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s)
}
}
+static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count,
+ uint64_t mmio_base, uint64_t mmio_size,
+ const char *hid)
+{
+ uint64_t plic_aplic_addr;
+ uint32_t gsi_base;
+ uint8_t socket;
+
+ for (socket = 0; socket < socket_count; socket++) {
+ plic_aplic_addr = mmio_base + mmio_size * socket;
+ gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket;
+ Aml *dev = aml_device("IC%.02X", socket);
+ aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid)));
+ aml_append(dev, aml_name_decl("_UID", aml_int(socket)));
+ aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base)));
+
+ Aml *crs = aml_resource_template();
+ aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size,
+ AML_READ_WRITE));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+ }
+}
+
static void
acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap,
uint32_t uart_irq)
{
Aml *dev = aml_device("COM0");
- aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501")));
+ aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003")));
aml_append(dev, aml_name_decl("_UID", aml_int(0)));
Aml *crs = aml_resource_template();
@@ -175,15 +199,42 @@ acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap,
}
/*
+ * Add DSDT entry for the IOMMU platform device.
+ * ACPI ID for IOMMU is defined in the section 6.2 of RISC-V BRS spec.
+ * https://github.com/riscv-non-isa/riscv-brs/releases/download/v0.8/riscv-brs-spec.pdf
+ */
+static void acpi_dsdt_add_iommu_sys(Aml *scope, const MemMapEntry *iommu_memmap,
+ uint32_t iommu_irq)
+{
+ uint32_t i;
+
+ Aml *dev = aml_device("IMU0");
+ aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0004")));
+ aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+
+ Aml *crs = aml_resource_template();
+ aml_append(crs, aml_memory32_fixed(iommu_memmap->base,
+ iommu_memmap->size, AML_READ_WRITE));
+ for (i = iommu_irq; i < iommu_irq + 4; i++) {
+ aml_append(crs, aml_interrupt(AML_CONSUMER, AML_EDGE, AML_ACTIVE_LOW,
+ AML_EXCLUSIVE, &i, 1));
+ }
+
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+}
+
+/*
* Serial Port Console Redirection Table (SPCR)
- * Rev: 1.07
+ * Rev: 1.10
*/
static void
spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s)
{
+ const char name[] = ".";
AcpiSpcrData serial = {
- .interface_type = 0, /* 16550 compatible */
+ .interface_type = 0x12, /* 16550 compatible */
.base_addr.id = AML_AS_SYSTEM_MEMORY,
.base_addr.width = 32,
.base_addr.offset = 0,
@@ -205,9 +256,14 @@ spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s)
.pci_function = 0,
.pci_flags = 0,
.pci_segment = 0,
+ .uart_clk_freq = 0,
+ .precise_baudrate = 0,
+ .namespace_string_length = sizeof(name),
+ .namespace_string_offset = 88,
};
- build_spcr(table_data, linker, &serial, 2, s->oem_id, s->oem_table_id);
+ build_spcr(table_data, linker, &serial, 4, s->oem_id, s->oem_table_id,
+ name);
}
/* RHCT Node[N] starts at offset 56 */
@@ -231,7 +287,7 @@ static void build_rhct(GArray *table_data,
uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0;
RISCVCPU *cpu = &s->soc[0].harts[0];
uint32_t mmu_offset = 0;
- uint8_t satp_mode_max;
+ bool rv32 = riscv_cpu_is_32bit(cpu);
g_autofree char *isa = NULL;
AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
@@ -251,7 +307,7 @@ static void build_rhct(GArray *table_data,
num_rhct_nodes++;
}
- if (cpu->cfg.satp_mode.supported != 0) {
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
num_rhct_nodes++;
}
@@ -311,22 +367,21 @@ static void build_rhct(GArray *table_data,
}
/* MMU node structure */
- if (cpu->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map);
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
mmu_offset = table_data->len - table.table_offset;
build_append_int_noprefix(table_data, 2, 2); /* Type */
build_append_int_noprefix(table_data, 8, 2); /* Length */
build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
build_append_int_noprefix(table_data, 0, 1); /* Reserved */
/* MMU Type */
- if (satp_mode_max == VM_1_10_SV57) {
+ if (cpu->cfg.max_satp_mode == VM_1_10_SV57) {
build_append_int_noprefix(table_data, 2, 1); /* Sv57 */
- } else if (satp_mode_max == VM_1_10_SV48) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV48) {
build_append_int_noprefix(table_data, 1, 1); /* Sv48 */
- } else if (satp_mode_max == VM_1_10_SV39) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV39) {
build_append_int_noprefix(table_data, 0, 1); /* Sv39 */
} else {
- assert(1);
+ g_assert_not_reached();
}
}
@@ -411,7 +466,18 @@ static void build_dsdt(GArray *table_data,
socket_count = riscv_socket_count(ms);
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base,
+ memmap[VIRT_PLIC].size, "RSCV0001");
+ } else {
+ acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base,
+ memmap[VIRT_APLIC_S].size, "RSCV0002");
+ }
+
acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ);
+ if (virt_is_iommu_sys_enabled(s)) {
+ acpi_dsdt_add_iommu_sys(scope, &memmap[VIRT_IOMMU_SYS], IOMMU_SYS_IRQ);
+ }
if (socket_count == 1) {
virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base,
@@ -564,6 +630,187 @@ static void build_madt(GArray *table_data,
acpi_table_end(linker, &table);
}
+#define ID_MAPPING_ENTRY_SIZE 20
+#define IOMMU_ENTRY_SIZE 40
+#define RISCV_INTERRUPT_WIRE_OFFSSET 40
+#define ROOT_COMPLEX_ENTRY_SIZE 20
+#define RIMT_NODE_OFFSET 48
+
+/*
+ * ID Mapping Structure
+ */
+static void build_rimt_id_mapping(GArray *table_data, uint32_t source_id_base,
+ uint32_t num_ids, uint32_t dest_id_base)
+{
+ /* Source ID Base */
+ build_append_int_noprefix(table_data, source_id_base, 4);
+ /* Number of IDs */
+ build_append_int_noprefix(table_data, num_ids, 4);
+ /* Destination Device ID Base */
+ build_append_int_noprefix(table_data, source_id_base, 4);
+ /* Destination IOMMU Offset */
+ build_append_int_noprefix(table_data, dest_id_base, 4);
+ /* Flags */
+ build_append_int_noprefix(table_data, 0, 4);
+}
+
+struct AcpiRimtIdMapping {
+ uint32_t source_id_base;
+ uint32_t num_ids;
+};
+typedef struct AcpiRimtIdMapping AcpiRimtIdMapping;
+
+/* Build the rimt ID mapping to IOMMU for a given PCI host bridge */
+static int rimt_host_bridges(Object *obj, void *opaque)
+{
+ GArray *idmap_blob = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
+ PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
+
+ if (bus && !pci_bus_bypass_iommu(bus)) {
+ int min_bus, max_bus;
+
+ pci_bus_range(bus, &min_bus, &max_bus);
+
+ AcpiRimtIdMapping idmap = {
+ .source_id_base = min_bus << 8,
+ .num_ids = (max_bus - min_bus + 1) << 8,
+ };
+ g_array_append_val(idmap_blob, idmap);
+ }
+ }
+
+ return 0;
+}
+
+static int rimt_idmap_compare(gconstpointer a, gconstpointer b)
+{
+ AcpiRimtIdMapping *idmap_a = (AcpiRimtIdMapping *)a;
+ AcpiRimtIdMapping *idmap_b = (AcpiRimtIdMapping *)b;
+
+ return idmap_a->source_id_base - idmap_b->source_id_base;
+}
+
+/*
+ * RISC-V IO Mapping Table (RIMT)
+ * https://github.com/riscv-non-isa/riscv-acpi-rimt/releases/download/v0.99/rimt-spec.pdf
+ */
+static void build_rimt(GArray *table_data, BIOSLinker *linker,
+ RISCVVirtState *s)
+{
+ int i, nb_nodes, rc_mapping_count;
+ size_t node_size, iommu_offset = 0;
+ uint32_t id = 0;
+ g_autoptr(GArray) iommu_idmaps = g_array_new(false, true,
+ sizeof(AcpiRimtIdMapping));
+
+ AcpiTable table = { .sig = "RIMT", .rev = 1, .oem_id = s->oem_id,
+ .oem_table_id = s->oem_table_id };
+
+ acpi_table_begin(&table, table_data);
+
+ object_child_foreach_recursive(object_get_root(),
+ rimt_host_bridges, iommu_idmaps);
+
+ /* Sort the ID mapping by Source ID Base*/
+ g_array_sort(iommu_idmaps, rimt_idmap_compare);
+
+ nb_nodes = 2; /* RC, IOMMU */
+ rc_mapping_count = iommu_idmaps->len;
+ /* Number of RIMT Nodes */
+ build_append_int_noprefix(table_data, nb_nodes, 4);
+
+ /* Offset to Array of RIMT Nodes */
+ build_append_int_noprefix(table_data, RIMT_NODE_OFFSET, 4);
+ build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+
+ iommu_offset = table_data->len - table.table_offset;
+ /* IOMMU Device Structure */
+ build_append_int_noprefix(table_data, 0, 1); /* Type - IOMMU*/
+ build_append_int_noprefix(table_data, 1, 1); /* Revision */
+ node_size = IOMMU_ENTRY_SIZE;
+ build_append_int_noprefix(table_data, node_size, 2); /* Length */
+ build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+ build_append_int_noprefix(table_data, id++, 2); /* ID */
+ if (virt_is_iommu_sys_enabled(s)) {
+ /* Hardware ID */
+ build_append_int_noprefix(table_data, 'R', 1);
+ build_append_int_noprefix(table_data, 'S', 1);
+ build_append_int_noprefix(table_data, 'C', 1);
+ build_append_int_noprefix(table_data, 'V', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '4', 1);
+ /* Base Address */
+ build_append_int_noprefix(table_data,
+ s->memmap[VIRT_IOMMU_SYS].base, 8);
+ build_append_int_noprefix(table_data, 0, 4); /* Flags */
+ } else {
+ /* Hardware ID */
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '1', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '0', 1);
+ build_append_int_noprefix(table_data, '1', 1);
+ build_append_int_noprefix(table_data, '4', 1);
+
+ build_append_int_noprefix(table_data, 0, 8); /* Base Address */
+ build_append_int_noprefix(table_data, 1, 4); /* Flags */
+ }
+
+ build_append_int_noprefix(table_data, 0, 4); /* Proximity Domain */
+ build_append_int_noprefix(table_data, 0, 2); /* PCI Segment number */
+ /* PCIe B/D/F */
+ if (virt_is_iommu_sys_enabled(s)) {
+ build_append_int_noprefix(table_data, 0, 2);
+ } else {
+ build_append_int_noprefix(table_data, s->pci_iommu_bdf, 2);
+ }
+ /* Number of interrupt wires */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* Interrupt wire array offset */
+ build_append_int_noprefix(table_data, RISCV_INTERRUPT_WIRE_OFFSSET, 2);
+
+ /* PCIe Root Complex Node */
+ build_append_int_noprefix(table_data, 1, 1); /* Type */
+ build_append_int_noprefix(table_data, 1, 1); /* Revision */
+ node_size = ROOT_COMPLEX_ENTRY_SIZE +
+ ID_MAPPING_ENTRY_SIZE * rc_mapping_count;
+ build_append_int_noprefix(table_data, node_size, 2); /* Length */
+ build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+ build_append_int_noprefix(table_data, id++, 2); /* ID */
+ build_append_int_noprefix(table_data, 0, 4); /* Flags */
+ build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+ /* PCI Segment number */
+ build_append_int_noprefix(table_data, 0, 2);
+ /* ID mapping array offset */
+ build_append_int_noprefix(table_data, ROOT_COMPLEX_ENTRY_SIZE, 2);
+ /* Number of ID mappings */
+ build_append_int_noprefix(table_data, rc_mapping_count, 2);
+
+ /* Output Reference */
+ AcpiRimtIdMapping *range;
+
+ /* ID mapping array */
+ for (i = 0; i < iommu_idmaps->len; i++) {
+ range = &g_array_index(iommu_idmaps, AcpiRimtIdMapping, i);
+ if (virt_is_iommu_sys_enabled(s)) {
+ range->source_id_base = 0;
+ } else {
+ range->source_id_base = s->pci_iommu_bdf + 1;
+ }
+ range->num_ids = 0xffff - s->pci_iommu_bdf;
+ build_rimt_id_mapping(table_data, range->source_id_base,
+ range->num_ids, iommu_offset);
+ }
+
+ acpi_table_end(linker, &table);
+}
+
/*
* ACPI spec, Revision 6.5+
* 5.2.16 System Resource Affinity Table (SRAT)
@@ -641,6 +888,11 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables)
acpi_add_table(table_offsets, tables_blob);
build_rhct(tables_blob, tables->linker, s);
+ if (virt_is_iommu_sys_enabled(s) || s->pci_iommu_bdf) {
+ acpi_add_table(table_offsets, tables_blob);
+ build_rimt(tables_blob, tables->linker, s);
+ }
+
acpi_add_table(table_offsets, tables_blob);
spcr_setup(tables_blob, tables->linker, s);
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 9981e0f..cf280a9 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -27,11 +27,13 @@
#include "hw/loader.h"
#include "hw/sysbus.h"
#include "hw/qdev-properties.h"
-#include "hw/char/serial.h"
+#include "hw/char/serial-mm.h"
#include "target/riscv/cpu.h"
#include "hw/core/sysbus-fdt.h"
#include "target/riscv/pmu.h"
#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/iommu.h"
+#include "hw/riscv/riscv-iommu-bits.h"
#include "hw/riscv/virt.h"
#include "hw/riscv/boot.h"
#include "hw/riscv/numa.h"
@@ -43,23 +45,33 @@
#include "hw/misc/sifive_test.h"
#include "hw/platform-bus.h"
#include "chardev/char.h"
-#include "sysemu/device_tree.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/tcg.h"
-#include "sysemu/kvm.h"
-#include "sysemu/tpm.h"
-#include "sysemu/qtest.h"
+#include "system/device_tree.h"
+#include "system/system.h"
+#include "system/tcg.h"
+#include "system/kvm.h"
+#include "system/tpm.h"
+#include "system/qtest.h"
#include "hw/pci/pci.h"
#include "hw/pci-host/gpex.h"
#include "hw/display/ramfb.h"
#include "hw/acpi/aml-build.h"
#include "qapi/qapi-visit-common.h"
#include "hw/virtio/virtio-iommu.h"
+#include "hw/uefi/var-service-api.h"
/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */
-static bool virt_use_kvm_aia(RISCVVirtState *s)
+static bool virt_use_kvm_aia_aplic_imsic(RISCVVirtAIAType aia_type)
{
- return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
+ bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
+
+ return riscv_is_kvm_aia_aplic_imsic(msimode);
+}
+
+static bool virt_use_emulated_aplic(RISCVVirtAIAType aia_type)
+{
+ bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC;
+
+ return riscv_use_emulated_aplic(msimode);
}
static bool virt_aclint_allowed(void)
@@ -75,6 +87,7 @@ static const MemMapEntry virt_memmap[] = {
[VIRT_CLINT] = { 0x2000000, 0x10000 },
[VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 },
[VIRT_PCIE_PIO] = { 0x3000000, 0x10000 },
+ [VIRT_IOMMU_SYS] = { 0x3010000, 0x1000 },
[VIRT_PLATFORM_BUS] = { 0x4000000, 0x2000000 },
[VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
[VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
@@ -153,8 +166,8 @@ static void virt_flash_map1(PFlashCFI01 *flash,
static void virt_flash_map(RISCVVirtState *s,
MemoryRegion *sysmem)
{
- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
+ hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2;
+ hwaddr flashbase = s->memmap[VIRT_FLASH].base;
virt_flash_map1(s->flash[0], flashbase, flashsize,
sysmem);
@@ -167,7 +180,7 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename,
{
int pin, dev;
uint32_t irq_map_stride = 0;
- uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+ uint32_t full_irq_map[PCI_NUM_PINS * PCI_NUM_PINS *
FDT_MAX_INT_MAP_WIDTH] = {};
uint32_t *irq_map = full_irq_map;
@@ -179,11 +192,11 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename,
* possible slot) seeing the interrupt-map-mask will allow the table
* to wrap to any number of devices.
*/
- for (dev = 0; dev < GPEX_NUM_IRQS; dev++) {
+ for (dev = 0; dev < PCI_NUM_PINS; dev++) {
int devfn = dev * 0x8;
- for (pin = 0; pin < GPEX_NUM_IRQS; pin++) {
- int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
+ for (pin = 0; pin < PCI_NUM_PINS; pin++) {
+ int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS);
int i = 0;
/* Fill PCI address cells */
@@ -209,7 +222,7 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename,
}
qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
- GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+ PCI_NUM_PINS * PCI_NUM_PINS *
irq_map_stride * sizeof(uint32_t));
qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
@@ -224,10 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
uint32_t cpu_phandle;
MachineState *ms = MACHINE(s);
bool is_32_bit = riscv_is_32bit(&s->soc[0]);
- uint8_t satp_mode_max;
for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
+ int8_t satp_mode_max = cpu_ptr->cfg.max_satp_mode;
g_autofree char *cpu_name = NULL;
g_autofree char *core_name = NULL;
g_autofree char *intc_name = NULL;
@@ -239,8 +252,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
s->soc[socket].hartid_base + cpu);
qemu_fdt_add_subnode(ms->fdt, cpu_name);
- if (cpu_ptr->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
+ if (satp_mode_max != -1) {
sv_name = g_strdup_printf("riscv,%s",
satp_mode_str(satp_mode_max, is_32_bit));
qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
@@ -288,16 +300,16 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
}
}
-static void create_fdt_socket_memory(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket)
+static void create_fdt_socket_memory(RISCVVirtState *s, int socket)
{
g_autofree char *mem_name = NULL;
- uint64_t addr, size;
+ hwaddr addr;
+ uint64_t size;
MachineState *ms = MACHINE(s);
- addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket);
+ addr = s->memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket);
size = riscv_socket_mem_size(ms, socket);
- mem_name = g_strdup_printf("/memory@%lx", (long)addr);
+ mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr);
qemu_fdt_add_subnode(ms->fdt, mem_name);
qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
addr >> 32, addr, size >> 32, size);
@@ -306,7 +318,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s,
}
static void create_fdt_socket_clint(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket,
+ int socket,
uint32_t *intc_phandles)
{
int cpu;
@@ -327,21 +339,22 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER);
}
- clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
+ clint_addr = s->memmap[VIRT_CLINT].base +
+ (s->memmap[VIRT_CLINT].size * socket);
clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
qemu_fdt_add_subnode(ms->fdt, clint_name);
qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible",
(char **)&clint_compat,
ARRAY_SIZE(clint_compat));
qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg",
- 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
+ 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size);
qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
riscv_socket_fdt_write_id(ms, clint_name, socket);
}
static void create_fdt_socket_aclint(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket,
+ int socket,
uint32_t *intc_phandles)
{
int cpu;
@@ -368,8 +381,10 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
- addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
+ addr = s->memmap[VIRT_CLINT].base +
+ (s->memmap[VIRT_CLINT].size * socket);
name = g_strdup_printf("/soc/mswi@%lx", addr);
+
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-mswi");
@@ -384,13 +399,13 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
}
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
- addr = memmap[VIRT_CLINT].base +
+ addr = s->memmap[VIRT_CLINT].base +
(RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
} else {
- addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
- (memmap[VIRT_CLINT].size * socket);
- size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE;
+ addr = s->memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
+ (s->memmap[VIRT_CLINT].size * socket);
+ size = s->memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE;
}
name = g_strdup_printf("/soc/mtimer@%lx", addr);
qemu_fdt_add_subnode(ms->fdt, name);
@@ -407,14 +422,15 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
g_free(name);
if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
- addr = memmap[VIRT_ACLINT_SSWI].base +
- (memmap[VIRT_ACLINT_SSWI].size * socket);
+ addr = s->memmap[VIRT_ACLINT_SSWI].base +
+ (s->memmap[VIRT_ACLINT_SSWI].size * socket);
+
name = g_strdup_printf("/soc/sswi@%lx", addr);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-sswi");
qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size);
+ 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_sswi_cells, aclint_cells_size);
qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
@@ -425,7 +441,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
}
static void create_fdt_socket_plic(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket,
+ int socket,
uint32_t *phandle, uint32_t *intc_phandles,
uint32_t *plic_phandles)
{
@@ -439,7 +455,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
};
plic_phandles[socket] = (*phandle)++;
- plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket);
+ plic_addr = s->memmap[VIRT_PLIC].base +
+ (s->memmap[VIRT_PLIC].size * socket);
plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr);
qemu_fdt_add_subnode(ms->fdt, plic_name);
qemu_fdt_setprop_cell(ms->fdt, plic_name,
@@ -478,7 +495,7 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
}
qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg",
- 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size);
+ 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size);
qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev",
VIRT_IRQCHIP_NUM_SOURCES - 1);
riscv_socket_fdt_write_id(ms, plic_name, socket);
@@ -487,8 +504,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
if (!socket) {
platform_bus_add_all_fdt_nodes(ms->fdt, plic_name,
- memmap[VIRT_PLATFORM_BUS].base,
- memmap[VIRT_PLATFORM_BUS].size,
+ s->memmap[VIRT_PLATFORM_BUS].base,
+ s->memmap[VIRT_PLATFORM_BUS].size,
VIRT_PLATFORM_BUS_IRQ);
}
}
@@ -552,7 +569,6 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
FDT_IMSIC_INT_CELLS);
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0);
qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0);
- qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#msi-cells", 0);
qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
@@ -576,7 +592,7 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr,
qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
}
-static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
+static void create_fdt_imsic(RISCVVirtState *s,
uint32_t *phandle, uint32_t *intc_phandles,
uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
{
@@ -585,12 +601,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
if (!kvm_enabled()) {
/* M-level IMSIC node */
- create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles,
+ create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_M].base, intc_phandles,
*msi_m_phandle, true, 0);
}
/* S-level IMSIC node */
- create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles,
+ create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_S].base, intc_phandles,
*msi_s_phandle, false,
imsic_num_bits(s->aia_guests + 1));
@@ -667,7 +683,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
}
static void create_fdt_socket_aplic(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket,
+ int socket,
uint32_t msi_m_phandle,
uint32_t msi_s_phandle,
uint32_t *phandle,
@@ -684,18 +700,19 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
if (!kvm_enabled()) {
/* M-level APLIC node */
- aplic_addr = memmap[VIRT_APLIC_M].base +
- (memmap[VIRT_APLIC_M].size * socket);
- create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size,
+ aplic_addr = s->memmap[VIRT_APLIC_M].base +
+ (s->memmap[VIRT_APLIC_M].size * socket);
+ create_fdt_one_aplic(s, socket, aplic_addr,
+ s->memmap[VIRT_APLIC_M].size,
msi_m_phandle, intc_phandles,
aplic_m_phandle, aplic_s_phandle,
true, num_harts);
}
/* S-level APLIC node */
- aplic_addr = memmap[VIRT_APLIC_S].base +
- (memmap[VIRT_APLIC_S].size * socket);
- create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size,
+ aplic_addr = s->memmap[VIRT_APLIC_S].base +
+ (s->memmap[VIRT_APLIC_S].size * socket);
+ create_fdt_one_aplic(s, socket, aplic_addr, s->memmap[VIRT_APLIC_S].size,
msi_s_phandle, intc_phandles,
aplic_s_phandle, 0,
false, num_harts);
@@ -703,8 +720,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
if (!socket) {
g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr);
platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name,
- memmap[VIRT_PLATFORM_BUS].base,
- memmap[VIRT_PLATFORM_BUS].size,
+ s->memmap[VIRT_PLATFORM_BUS].base,
+ s->memmap[VIRT_PLATFORM_BUS].size,
VIRT_PLATFORM_BUS_IRQ);
}
@@ -722,7 +739,7 @@ static void create_fdt_pmu(RISCVVirtState *s)
riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name);
}
-static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
+static void create_fdt_sockets(RISCVVirtState *s,
uint32_t *phandle,
uint32_t *irq_mmio_phandle,
uint32_t *irq_pcie_phandle,
@@ -739,7 +756,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_add_subnode(ms->fdt, "/cpus");
qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency",
kvm_enabled() ?
- kvm_riscv_get_timebase_frequency(first_cpu) :
+ kvm_riscv_get_timebase_frequency(&s->soc->harts[0]) :
RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1);
@@ -758,53 +775,54 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
create_fdt_socket_cpus(s, socket, clust_name, phandle,
&intc_phandles[phandle_pos]);
- create_fdt_socket_memory(s, memmap, socket);
+ create_fdt_socket_memory(s, socket);
if (virt_aclint_allowed() && s->have_aclint) {
- create_fdt_socket_aclint(s, memmap, socket,
+ create_fdt_socket_aclint(s, socket,
&intc_phandles[phandle_pos]);
} else if (tcg_enabled()) {
- create_fdt_socket_clint(s, memmap, socket,
+ create_fdt_socket_clint(s, socket,
&intc_phandles[phandle_pos]);
}
}
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
- create_fdt_imsic(s, memmap, phandle, intc_phandles,
- &msi_m_phandle, &msi_s_phandle);
+ create_fdt_imsic(s, phandle, intc_phandles,
+ &msi_m_phandle, &msi_s_phandle);
*msi_pcie_phandle = msi_s_phandle;
}
- /* KVM AIA only has one APLIC instance */
- if (kvm_enabled() && virt_use_kvm_aia(s)) {
- create_fdt_socket_aplic(s, memmap, 0,
+ /*
+ * With KVM AIA aplic-imsic, using an irqchip without split
+ * mode, we'll use only one APLIC instance.
+ */
+ if (!virt_use_emulated_aplic(s->aia_type)) {
+ create_fdt_socket_aplic(s, 0,
msi_m_phandle, msi_s_phandle, phandle,
&intc_phandles[0], xplic_phandles,
ms->smp.cpus);
+
+ *irq_mmio_phandle = xplic_phandles[0];
+ *irq_virtio_phandle = xplic_phandles[0];
+ *irq_pcie_phandle = xplic_phandles[0];
} else {
phandle_pos = ms->smp.cpus;
for (socket = (socket_count - 1); socket >= 0; socket--) {
phandle_pos -= s->soc[socket].num_harts;
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
- create_fdt_socket_plic(s, memmap, socket, phandle,
+ create_fdt_socket_plic(s, socket, phandle,
&intc_phandles[phandle_pos],
xplic_phandles);
} else {
- create_fdt_socket_aplic(s, memmap, socket,
+ create_fdt_socket_aplic(s, socket,
msi_m_phandle, msi_s_phandle, phandle,
&intc_phandles[phandle_pos],
xplic_phandles,
s->soc[socket].num_harts);
}
}
- }
- if (kvm_enabled() && virt_use_kvm_aia(s)) {
- *irq_mmio_phandle = xplic_phandles[0];
- *irq_virtio_phandle = xplic_phandles[0];
- *irq_pcie_phandle = xplic_phandles[0];
- } else {
for (socket = 0; socket < socket_count; socket++) {
if (socket == 0) {
*irq_mmio_phandle = xplic_phandles[socket];
@@ -824,21 +842,24 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
riscv_socket_fdt_write_distance_matrix(ms);
}
-static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
- uint32_t irq_virtio_phandle)
+static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle)
{
int i;
MachineState *ms = MACHINE(s);
+ hwaddr virtio_base = s->memmap[VIRT_VIRTIO].base;
for (i = 0; i < VIRTIO_COUNT; i++) {
- g_autofree char *name = g_strdup_printf("/soc/virtio_mmio@%lx",
- (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
+ g_autofree char *name = NULL;
+ uint64_t size = s->memmap[VIRT_VIRTIO].size;
+ hwaddr addr = virtio_base + i * size;
+
+ name = g_strdup_printf("/soc/virtio_mmio@%"HWADDR_PRIx, addr);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
- 0x0, memmap[VIRT_VIRTIO].size);
+ 0x0, addr,
+ 0x0, size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_virtio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -851,15 +872,16 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
}
}
-static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
+static void create_fdt_pcie(RISCVVirtState *s,
uint32_t irq_pcie_phandle,
- uint32_t msi_pcie_phandle)
+ uint32_t msi_pcie_phandle,
+ uint32_t iommu_sys_phandle)
{
g_autofree char *name = NULL;
MachineState *ms = MACHINE(s);
- name = g_strdup_printf("/soc/pci@%lx",
- (long) memmap[VIRT_PCIE_ECAM].base);
+ name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx,
+ s->memmap[VIRT_PCIE_ECAM].base);
qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells",
FDT_PCI_ADDR_CELLS);
qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells",
@@ -870,36 +892,41 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci");
qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0);
qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0,
- memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
+ s->memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0);
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
}
qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
- memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size);
+ s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size);
qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
- 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size,
+ 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size,
1, FDT_PCI_RANGE_MMIO,
- 2, memmap[VIRT_PCIE_MMIO].base,
- 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size,
+ 2, s->memmap[VIRT_PCIE_MMIO].base,
+ 2, s->memmap[VIRT_PCIE_MMIO].base, 2, s->memmap[VIRT_PCIE_MMIO].size,
1, FDT_PCI_RANGE_MMIO_64BIT,
2, virt_high_pcie_memmap.base,
2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size);
+ if (virt_is_iommu_sys_enabled(s)) {
+ qemu_fdt_setprop_cells(ms->fdt, name, "iommu-map",
+ 0, iommu_sys_phandle, 0, 0, 0,
+ iommu_sys_phandle, 0, 0xffff);
+ }
+
create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle);
}
-static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap,
- uint32_t *phandle)
+static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle)
{
char *name;
uint32_t test_phandle;
MachineState *ms = MACHINE(s);
test_phandle = (*phandle)++;
- name = g_strdup_printf("/soc/test@%lx",
- (long)memmap[VIRT_TEST].base);
+ name = g_strdup_printf("/soc/test@%"HWADDR_PRIx,
+ s->memmap[VIRT_TEST].base);
qemu_fdt_add_subnode(ms->fdt, name);
{
static const char * const compat[3] = {
@@ -909,7 +936,7 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap,
(char **)&compat, ARRAY_SIZE(compat));
}
qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size);
+ 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size);
qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
g_free(name);
@@ -931,18 +958,19 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap,
g_free(name);
}
-static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
+static void create_fdt_uart(RISCVVirtState *s,
uint32_t irq_mmio_phandle)
{
g_autofree char *name = NULL;
MachineState *ms = MACHINE(s);
- name = g_strdup_printf("/soc/serial@%lx", (long)memmap[VIRT_UART0].base);
+ name = g_strdup_printf("/soc/serial@%"HWADDR_PRIx,
+ s->memmap[VIRT_UART0].base);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, memmap[VIRT_UART0].base,
- 0x0, memmap[VIRT_UART0].size);
+ 0x0, s->memmap[VIRT_UART0].base,
+ 0x0, s->memmap[VIRT_UART0].size);
qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -952,20 +980,22 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
}
qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
+ qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name);
}
-static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
+static void create_fdt_rtc(RISCVVirtState *s,
uint32_t irq_mmio_phandle)
{
g_autofree char *name = NULL;
MachineState *ms = MACHINE(s);
- name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base);
+ name = g_strdup_printf("/soc/rtc@%"HWADDR_PRIx,
+ s->memmap[VIRT_RTC].base);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"google,goldfish-rtc");
qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size);
+ 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -975,11 +1005,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
}
}
-static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
+static void create_fdt_flash(RISCVVirtState *s)
{
MachineState *ms = MACHINE(s);
- hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
- hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
+ hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2;
+ hwaddr flashbase = s->memmap[VIRT_FLASH].base;
g_autofree char *name = g_strdup_printf("/flash@%" PRIx64, flashbase);
qemu_fdt_add_subnode(ms->fdt, name);
@@ -990,11 +1020,11 @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4);
}
-static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap)
+static void create_fdt_fw_cfg(RISCVVirtState *s)
{
MachineState *ms = MACHINE(s);
- hwaddr base = memmap[VIRT_FW_CFG].base;
- hwaddr size = memmap[VIRT_FW_CFG].size;
+ hwaddr base = s->memmap[VIRT_FW_CFG].base;
+ hwaddr size = s->memmap[VIRT_FW_CFG].size;
g_autofree char *nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
qemu_fdt_add_subnode(ms->fdt, nodename);
@@ -1013,8 +1043,8 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
g_autofree char *iommu_node = NULL;
g_autofree char *pci_node = NULL;
- pci_node = g_strdup_printf("/soc/pci@%lx",
- (long) virt_memmap[VIRT_PCIE_ECAM].base);
+ pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx,
+ s->memmap[VIRT_PCIE_ECAM].base);
iommu_node = g_strdup_printf("%s/virtio_iommu@%x,%x", pci_node,
PCI_SLOT(bdf), PCI_FUNC(bdf));
iommu_phandle = qemu_fdt_alloc_phandle(fdt);
@@ -1033,27 +1063,99 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf)
bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
}
+static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
+ uint32_t msi_phandle,
+ uint32_t *iommu_sys_phandle)
+{
+ const char comp[] = "riscv,iommu";
+ void *fdt = MACHINE(s)->fdt;
+ uint32_t iommu_phandle;
+ g_autofree char *iommu_node = NULL;
+ hwaddr addr = s->memmap[VIRT_IOMMU_SYS].base;
+ hwaddr size = s->memmap[VIRT_IOMMU_SYS].size;
+ uint32_t iommu_irq_map[RISCV_IOMMU_INTR_COUNT] = {
+ IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_CQ,
+ IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_FQ,
+ IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_PM,
+ IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_PQ,
+ };
+
+ iommu_node = g_strdup_printf("/soc/iommu@%x",
+ (unsigned int) s->memmap[VIRT_IOMMU_SYS].base);
+ iommu_phandle = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_add_subnode(fdt, iommu_node);
+
+ qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp));
+ qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
+ qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
+
+ qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
+ addr >> 32, addr, size >> 32, size);
+ qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip);
+
+ qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts",
+ iommu_irq_map[0], FDT_IRQ_TYPE_EDGE_LOW,
+ iommu_irq_map[1], FDT_IRQ_TYPE_EDGE_LOW,
+ iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW,
+ iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW);
+
+ qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle);
+
+ *iommu_sys_phandle = iommu_phandle;
+}
+
+static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf)
+{
+ const char comp[] = "riscv,pci-iommu";
+ void *fdt = MACHINE(s)->fdt;
+ uint32_t iommu_phandle;
+ g_autofree char *iommu_node = NULL;
+ g_autofree char *pci_node = NULL;
+
+ pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx,
+ s->memmap[VIRT_PCIE_ECAM].base);
+ iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf);
+ iommu_phandle = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_add_subnode(fdt, iommu_node);
+
+ qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp));
+ qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
+ qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
+ qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
+ bdf << 8, 0, 0, 0, 0);
+ qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map",
+ 0, iommu_phandle, 0, bdf,
+ bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf);
+ s->pci_iommu_bdf = bdf;
+}
+
static void finalize_fdt(RISCVVirtState *s)
{
uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1;
+ uint32_t iommu_sys_phandle = 1;
- create_fdt_sockets(s, virt_memmap, &phandle, &irq_mmio_phandle,
+ create_fdt_sockets(s, &phandle, &irq_mmio_phandle,
&irq_pcie_phandle, &irq_virtio_phandle,
&msi_pcie_phandle);
- create_fdt_virtio(s, virt_memmap, irq_virtio_phandle);
+ create_fdt_virtio(s, irq_virtio_phandle);
- create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle);
+ if (virt_is_iommu_sys_enabled(s)) {
+ create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle,
+ &iommu_sys_phandle);
+ }
+ create_fdt_pcie(s, irq_pcie_phandle, msi_pcie_phandle,
+ iommu_sys_phandle);
- create_fdt_reset(s, virt_memmap, &phandle);
+ create_fdt_reset(s, &phandle);
- create_fdt_uart(s, virt_memmap, irq_mmio_phandle);
+ create_fdt_uart(s, irq_mmio_phandle);
- create_fdt_rtc(s, virt_memmap, irq_mmio_phandle);
+ create_fdt_rtc(s, irq_mmio_phandle);
}
-static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
+static void create_fdt(RISCVVirtState *s)
{
MachineState *ms = MACHINE(s);
uint8_t rng_seed[32];
@@ -1080,7 +1182,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
* The "/soc/pci@..." node is needed for PCIE hotplugs
* that might happen before finalize_fdt().
*/
- name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base);
+ name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx,
+ s->memmap[VIRT_PCIE_ECAM].base);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_add_subnode(ms->fdt, "/chosen");
@@ -1090,8 +1193,10 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
rng_seed, sizeof(rng_seed));
- create_fdt_flash(s, memmap);
- create_fdt_fw_cfg(s, memmap);
+ qemu_fdt_add_subnode(ms->fdt, "/aliases");
+
+ create_fdt_flash(s);
+ create_fdt_fw_cfg(s);
create_fdt_pmu(s);
}
@@ -1116,23 +1221,21 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
dev = qdev_new(TYPE_GPEX_HOST);
/* Set GPEX object properties for the virt machine */
- object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE,
+ object_property_set_uint(OBJECT(dev), PCI_HOST_ECAM_BASE,
ecam_base, NULL);
- object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE,
+ object_property_set_int(OBJECT(dev), PCI_HOST_ECAM_SIZE,
ecam_size, NULL);
- object_property_set_uint(OBJECT(GPEX_HOST(dev)),
- PCI_HOST_BELOW_4G_MMIO_BASE,
+ object_property_set_uint(OBJECT(dev), PCI_HOST_BELOW_4G_MMIO_BASE,
mmio_base, NULL);
- object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE,
+ object_property_set_int(OBJECT(dev), PCI_HOST_BELOW_4G_MMIO_SIZE,
mmio_size, NULL);
- object_property_set_uint(OBJECT(GPEX_HOST(dev)),
- PCI_HOST_ABOVE_4G_MMIO_BASE,
+ object_property_set_uint(OBJECT(dev), PCI_HOST_ABOVE_4G_MMIO_BASE,
high_mmio_base, NULL);
- object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE,
+ object_property_set_int(OBJECT(dev), PCI_HOST_ABOVE_4G_MMIO_SIZE,
high_mmio_size, NULL);
- object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE,
+ object_property_set_uint(OBJECT(dev), PCI_HOST_PIO_BASE,
pio_base, NULL);
- object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE,
+ object_property_set_int(OBJECT(dev), PCI_HOST_PIO_SIZE,
pio_size, NULL);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -1158,20 +1261,19 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
- for (i = 0; i < GPEX_NUM_IRQS; i++) {
+ for (i = 0; i < PCI_NUM_PINS; i++) {
irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
}
- GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus;
+ GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(dev)->bus;
return dev;
}
-static FWCfgState *create_fw_cfg(const MachineState *ms)
+static FWCfgState *create_fw_cfg(const MachineState *ms, hwaddr base)
{
- hwaddr base = virt_memmap[VIRT_FW_CFG].base;
FWCfgState *fw_cfg;
fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16,
@@ -1184,27 +1286,22 @@ static FWCfgState *create_fw_cfg(const MachineState *ms)
static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket,
int base_hartid, int hart_count)
{
- DeviceState *ret;
g_autofree char *plic_hart_config = NULL;
/* Per-socket PLIC hart topology configuration string */
plic_hart_config = riscv_plic_hart_config_string(hart_count);
/* Per-socket PLIC */
- ret = sifive_plic_create(
- memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size,
- plic_hart_config, hart_count, base_hartid,
- VIRT_IRQCHIP_NUM_SOURCES,
- ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1),
- VIRT_PLIC_PRIORITY_BASE,
- VIRT_PLIC_PENDING_BASE,
- VIRT_PLIC_ENABLE_BASE,
- VIRT_PLIC_ENABLE_STRIDE,
- VIRT_PLIC_CONTEXT_BASE,
- VIRT_PLIC_CONTEXT_STRIDE,
- memmap[VIRT_PLIC].size);
-
- return ret;
+ return sifive_plic_create(
+ memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size,
+ plic_hart_config, hart_count, base_hartid,
+ VIRT_IRQCHIP_NUM_SOURCES,
+ ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1),
+ VIRT_PLIC_PRIORITY_BASE, VIRT_PLIC_PENDING_BASE,
+ VIRT_PLIC_ENABLE_BASE, VIRT_PLIC_ENABLE_STRIDE,
+ VIRT_PLIC_CONTEXT_BASE,
+ VIRT_PLIC_CONTEXT_STRIDE,
+ memmap[VIRT_PLIC].size);
}
static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
@@ -1212,7 +1309,7 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
int base_hartid, int hart_count)
{
int i;
- hwaddr addr;
+ hwaddr addr = 0;
uint32_t guest_bits;
DeviceState *aplic_s = NULL;
DeviceState *aplic_m = NULL;
@@ -1262,6 +1359,10 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
VIRT_IRQCHIP_NUM_PRIO_BITS,
msimode, false, aplic_m);
+ if (kvm_enabled() && msimode) {
+ riscv_aplic_set_kvm_msicfgaddr(RISCV_APLIC(aplic_s), addr);
+ }
+
return kvm_enabled() ? aplic_s : aplic_m;
}
@@ -1269,14 +1370,13 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
{
DeviceState *dev;
SysBusDevice *sysbus;
- const MemMapEntry *memmap = virt_memmap;
int i;
MemoryRegion *sysmem = get_system_memory();
dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
qdev_prop_set_uint32(dev, "num_irqs", VIRT_PLATFORM_BUS_NUM_IRQS);
- qdev_prop_set_uint32(dev, "mmio_size", memmap[VIRT_PLATFORM_BUS].size);
+ qdev_prop_set_uint32(dev, "mmio_size", s->memmap[VIRT_PLATFORM_BUS].size);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
s->platform_bus_dev = dev;
@@ -1287,7 +1387,7 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
}
memory_region_add_subregion(sysmem,
- memmap[VIRT_PLATFORM_BUS].base,
+ s->memmap[VIRT_PLATFORM_BUS].base,
sysbus_mmio_get_region(sysbus, 0));
}
@@ -1334,14 +1434,14 @@ static void virt_machine_done(Notifier *notifier, void *data)
{
RISCVVirtState *s = container_of(notifier, RISCVVirtState,
machine_done);
- const MemMapEntry *memmap = virt_memmap;
MachineState *machine = MACHINE(s);
- target_ulong start_addr = memmap[VIRT_DRAM].base;
+ hwaddr start_addr = s->memmap[VIRT_DRAM].base;
target_ulong firmware_end_addr, kernel_start_addr;
const char *firmware_name = riscv_default_firmware_name(&s->soc[0]);
uint64_t fdt_load_addr;
uint64_t kernel_entry = 0;
BlockBackend *pflash_blk0;
+ RISCVBootInfo boot_info;
/*
* An user provided dtb must include everything, including
@@ -1368,7 +1468,7 @@ static void virt_machine_done(Notifier *notifier, void *data)
}
firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
- start_addr, NULL);
+ &start_addr, NULL);
pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]);
if (pflash_blk0) {
@@ -1379,34 +1479,36 @@ static void virt_machine_done(Notifier *notifier, void *data)
* let's overwrite the address we jump to after reset to
* the base of the flash.
*/
- start_addr = virt_memmap[VIRT_FLASH].base;
+ start_addr = s->memmap[VIRT_FLASH].base;
} else {
/*
* Pflash was supplied but either KVM guest or bios is not none.
* In this case, base of the flash would contain S-mode payload.
*/
riscv_setup_firmware_boot(machine);
- kernel_entry = virt_memmap[VIRT_FLASH].base;
+ kernel_entry = s->memmap[VIRT_FLASH].base;
}
}
+ riscv_boot_info_init(&boot_info, &s->soc[0]);
+
if (machine->kernel_filename && !kernel_entry) {
- kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
+ kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info,
firmware_end_addr);
-
- kernel_entry = riscv_load_kernel(machine, &s->soc[0],
- kernel_start_addr, true, NULL);
+ riscv_load_kernel(machine, &boot_info, kernel_start_addr,
+ true, NULL);
+ kernel_entry = boot_info.image_low_addr;
}
- fdt_load_addr = riscv_compute_fdt_addr(memmap[VIRT_DRAM].base,
- memmap[VIRT_DRAM].size,
- machine);
+ fdt_load_addr = riscv_compute_fdt_addr(s->memmap[VIRT_DRAM].base,
+ s->memmap[VIRT_DRAM].size,
+ machine, &boot_info);
riscv_load_fdt(fdt_load_addr, machine->fdt);
/* load the reset vector */
riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
- virt_memmap[VIRT_MROM].base,
- virt_memmap[VIRT_MROM].size, kernel_entry,
+ s->memmap[VIRT_MROM].base,
+ s->memmap[VIRT_MROM].size, kernel_entry,
fdt_load_addr);
/*
@@ -1427,7 +1529,6 @@ static void virt_machine_done(Notifier *notifier, void *data)
static void virt_machine_init(MachineState *machine)
{
- const MemMapEntry *memmap = virt_memmap;
RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
@@ -1435,6 +1536,8 @@ static void virt_machine_init(MachineState *machine)
int i, base_hartid, hart_count;
int socket_count = riscv_socket_count(machine);
+ s->memmap = virt_memmap;
+
/* Check socket count limit */
if (VIRT_SOCKETS_MAX < socket_count) {
error_report("number of sockets/nodes should be less than %d",
@@ -1482,7 +1585,7 @@ static void virt_machine_init(MachineState *machine)
if (virt_aclint_allowed() && s->have_aclint) {
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
/* Per-socket ACLINT MTIMER */
- riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
+ riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base +
i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
base_hartid, hart_count,
@@ -1491,28 +1594,28 @@ static void virt_machine_init(MachineState *machine)
RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
} else {
/* Per-socket ACLINT MSWI, MTIMER, and SSWI */
- riscv_aclint_swi_create(memmap[VIRT_CLINT].base +
- i * memmap[VIRT_CLINT].size,
+ riscv_aclint_swi_create(s->memmap[VIRT_CLINT].base +
+ i * s->memmap[VIRT_CLINT].size,
base_hartid, hart_count, false);
- riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
- i * memmap[VIRT_CLINT].size +
+ riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base +
+ i * s->memmap[VIRT_CLINT].size +
RISCV_ACLINT_SWI_SIZE,
RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
base_hartid, hart_count,
RISCV_ACLINT_DEFAULT_MTIMECMP,
RISCV_ACLINT_DEFAULT_MTIME,
RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
- riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base +
- i * memmap[VIRT_ACLINT_SSWI].size,
+ riscv_aclint_swi_create(s->memmap[VIRT_ACLINT_SSWI].base +
+ i * s->memmap[VIRT_ACLINT_SSWI].size,
base_hartid, hart_count, true);
}
} else if (tcg_enabled()) {
/* Per-socket SiFive CLINT */
riscv_aclint_swi_create(
- memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size,
+ s->memmap[VIRT_CLINT].base + i * s->memmap[VIRT_CLINT].size,
base_hartid, hart_count, false);
- riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
- i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE,
+ riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base +
+ i * s->memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE,
RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
@@ -1520,11 +1623,11 @@ static void virt_machine_init(MachineState *machine)
/* Per-socket interrupt controller */
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
- s->irqchip[i] = virt_create_plic(memmap, i,
+ s->irqchip[i] = virt_create_plic(s->memmap, i,
base_hartid, hart_count);
} else {
s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests,
- memmap, i, base_hartid,
+ s->memmap, i, base_hartid,
hart_count);
}
@@ -1543,11 +1646,11 @@ static void virt_machine_init(MachineState *machine)
}
}
- if (kvm_enabled() && virt_use_kvm_aia(s)) {
+ if (kvm_enabled() && virt_use_kvm_aia_aplic_imsic(s->aia_type)) {
kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT,
VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS,
- memmap[VIRT_APLIC_S].base,
- memmap[VIRT_IMSIC_S].base,
+ s->memmap[VIRT_APLIC_S].base,
+ s->memmap[VIRT_IMSIC_S].base,
s->aia_guests);
}
@@ -1563,37 +1666,36 @@ static void virt_machine_init(MachineState *machine)
virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE;
} else {
virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE;
- virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size;
+ virt_high_pcie_memmap.base = s->memmap[VIRT_DRAM].base +
+ machine->ram_size;
virt_high_pcie_memmap.base =
ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size);
}
- s->memmap = virt_memmap;
-
/* register system main memory (actual RAM) */
- memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base,
- machine->ram);
+ memory_region_add_subregion(system_memory, s->memmap[VIRT_DRAM].base,
+ machine->ram);
/* boot rom */
memory_region_init_rom(mask_rom, NULL, "riscv_virt_board.mrom",
- memmap[VIRT_MROM].size, &error_fatal);
- memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base,
+ s->memmap[VIRT_MROM].size, &error_fatal);
+ memory_region_add_subregion(system_memory, s->memmap[VIRT_MROM].base,
mask_rom);
/*
* Init fw_cfg. Must be done before riscv_load_fdt, otherwise the
* device tree cannot be altered and we get FDT_ERR_NOSPACE.
*/
- s->fw_cfg = create_fw_cfg(machine);
+ s->fw_cfg = create_fw_cfg(machine, s->memmap[VIRT_FW_CFG].base);
rom_set_fw(s->fw_cfg);
/* SiFive Test MMIO device */
- sifive_test_create(memmap[VIRT_TEST].base);
+ sifive_test_create(s->memmap[VIRT_TEST].base);
/* VirtIO MMIO devices */
for (i = 0; i < VIRTIO_COUNT; i++) {
sysbus_create_simple("virtio-mmio",
- memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
+ s->memmap[VIRT_VIRTIO].base + i * s->memmap[VIRT_VIRTIO].size,
qdev_get_gpio_in(virtio_irqchip, VIRTIO_IRQ + i));
}
@@ -1601,11 +1703,11 @@ static void virt_machine_init(MachineState *machine)
create_platform_bus(s, mmio_irqchip);
- serial_mm_init(system_memory, memmap[VIRT_UART0].base,
+ serial_mm_init(system_memory, s->memmap[VIRT_UART0].base,
0, qdev_get_gpio_in(mmio_irqchip, UART0_IRQ), 399193,
serial_hd(0), DEVICE_LITTLE_ENDIAN);
- sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base,
+ sysbus_create_simple("goldfish_rtc", s->memmap[VIRT_RTC].base,
qdev_get_gpio_in(mmio_irqchip, RTC_IRQ));
for (i = 0; i < ARRAY_SIZE(s->flash); i++) {
@@ -1623,7 +1725,23 @@ static void virt_machine_init(MachineState *machine)
exit(1);
}
} else {
- create_fdt(s, memmap);
+ create_fdt(s);
+ }
+
+ if (virt_is_iommu_sys_enabled(s)) {
+ DeviceState *iommu_sys = qdev_new(TYPE_RISCV_IOMMU_SYS);
+
+ object_property_set_uint(OBJECT(iommu_sys), "addr",
+ s->memmap[VIRT_IOMMU_SYS].base,
+ &error_fatal);
+ object_property_set_uint(OBJECT(iommu_sys), "base-irq",
+ IOMMU_SYS_IRQ,
+ &error_fatal);
+ object_property_set_link(OBJECT(iommu_sys), "irqchip",
+ OBJECT(mmio_irqchip),
+ &error_fatal);
+
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(iommu_sys), &error_fatal);
}
s->machine_done.notify = virt_machine_done;
@@ -1639,6 +1757,7 @@ static void virt_machine_instance_init(Object *obj)
s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
s->acpi = ON_OFF_AUTO_AUTO;
+ s->iommu_sys = ON_OFF_AUTO_AUTO;
}
static char *virt_get_aia_guests(Object *obj, Error **errp)
@@ -1711,6 +1830,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp)
s->have_aclint = value;
}
+bool virt_is_iommu_sys_enabled(RISCVVirtState *s)
+{
+ return s->iommu_sys == ON_OFF_AUTO_ON;
+}
+
+static void virt_get_iommu_sys(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+ OnOffAuto iommu_sys = s->iommu_sys;
+
+ visit_type_OnOffAuto(v, name, &iommu_sys, errp);
+}
+
+static void virt_set_iommu_sys(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &s->iommu_sys, errp);
+}
+
bool virt_is_acpi_enabled(RISCVVirtState *s)
{
return s->acpi != ON_OFF_AUTO_OFF;
@@ -1737,11 +1878,15 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
DeviceState *dev)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
if (device_is_dynamic_sysbus(mc, dev) ||
- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
+ s->iommu_sys = ON_OFF_AUTO_OFF;
return HOTPLUG_HANDLER(machine);
}
+
return NULL;
}
@@ -1762,9 +1907,14 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
}
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) {
+ create_fdt_iommu(s, pci_get_bdf(PCI_DEVICE(dev)));
+ s->iommu_sys = ON_OFF_AUTO_OFF;
+ }
}
-static void virt_machine_class_init(ObjectClass *oc, void *data)
+static void virt_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
@@ -1789,6 +1939,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
hc->plug = virt_machine_device_plug_cb;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS);
#ifdef CONFIG_TPM
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
#endif
@@ -1823,6 +1974,12 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
NULL, NULL);
object_class_property_set_description(oc, "acpi",
"Enable ACPI");
+
+ object_class_property_add(oc, "iommu-sys", "OnOffAuto",
+ virt_get_iommu_sys, virt_set_iommu_sys,
+ NULL, NULL);
+ object_class_property_set_description(oc, "iommu-sys",
+ "Enable IOMMU platform device");
}
static const TypeInfo virt_machine_typeinfo = {
@@ -1831,7 +1988,7 @@ static const TypeInfo virt_machine_typeinfo = {
.class_init = virt_machine_class_init,
.instance_init = virt_machine_instance_init,
.instance_size = sizeof(RISCVVirtState),
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
},