diff options
Diffstat (limited to 'hw/riscv')
-rw-r--r-- | hw/riscv/Kconfig | 14 | ||||
-rw-r--r-- | hw/riscv/boot.c | 159 | ||||
-rw-r--r-- | hw/riscv/meson.build | 3 | ||||
-rw-r--r-- | hw/riscv/microblaze-v-generic.c | 189 | ||||
-rw-r--r-- | hw/riscv/microchip_pfsoc.c | 174 | ||||
-rw-r--r-- | hw/riscv/numa.c | 2 | ||||
-rw-r--r-- | hw/riscv/opentitan.c | 17 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-bits.h | 468 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-hpm.c | 381 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-hpm.h | 33 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-pci.c | 217 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-sys.c | 248 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu.c | 2679 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu.h | 157 | ||||
-rw-r--r-- | hw/riscv/riscv_hart.c | 105 | ||||
-rw-r--r-- | hw/riscv/shakti_c.c | 21 | ||||
-rw-r--r-- | hw/riscv/sifive_e.c | 11 | ||||
-rw-r--r-- | hw/riscv/sifive_u.c | 40 | ||||
-rw-r--r-- | hw/riscv/spike.c | 25 | ||||
-rw-r--r-- | hw/riscv/trace-events | 26 | ||||
-rw-r--r-- | hw/riscv/trace.h | 1 | ||||
-rw-r--r-- | hw/riscv/virt-acpi-build.c | 278 | ||||
-rw-r--r-- | hw/riscv/virt.c | 592 |
23 files changed, 5463 insertions, 377 deletions
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index a2030e3..e6a0ac1 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -1,3 +1,6 @@ +config RISCV_IOMMU + bool + config RISCV_NUMA bool @@ -22,6 +25,14 @@ config MICROCHIP_PFSOC select SIFIVE_PLIC select UNIMP +config MICROBLAZE_V + bool + default y + depends on RISCV32 || RISCV64 + select XILINX + select XILINX_AXI + select XILINX_ETHLITE + config OPENTITAN bool default y @@ -44,9 +55,10 @@ config RISCV_VIRT select PCI select PCI_EXPRESS_GENERIC_BRIDGE select PFLASH_CFI01 - select SERIAL + select SERIAL_MM select RISCV_ACLINT select RISCV_APLIC + select RISCV_IOMMU select RISCV_IMSIC select SIFIVE_PLIC select SIFIVE_TEST diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index 47281ca..828a867 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -27,17 +27,17 @@ #include "hw/riscv/boot.h" #include "hw/riscv/boot_opensbi.h" #include "elf.h" -#include "sysemu/device_tree.h" -#include "sysemu/qtest.h" -#include "sysemu/kvm.h" -#include "sysemu/reset.h" +#include "system/device_tree.h" +#include "system/qtest.h" +#include "system/kvm.h" +#include "system/reset.h" #include <libfdt.h> bool riscv_is_32bit(RISCVHartArrayState *harts) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(&harts->harts[0]); - return mcc->misa_mxl_max == MXL_RV32; + return mcc->def->misa_mxl_max == MXL_RV32; } /* @@ -67,9 +67,16 @@ char *riscv_plic_hart_config_string(int hart_count) return g_strjoinv(",", (char **)vals); } -target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts, +void riscv_boot_info_init(RISCVBootInfo *info, RISCVHartArrayState *harts) +{ + info->kernel_size = 0; + info->initrd_size = 0; + info->is_32bit = riscv_is_32bit(harts); +} + +target_ulong riscv_calc_kernel_start_addr(RISCVBootInfo *info, target_ulong firmware_end_addr) { - if (riscv_is_32bit(harts)) { + if (info->is_32bit) { return QEMU_ALIGN_UP(firmware_end_addr, 4 * MiB); } else { return QEMU_ALIGN_UP(firmware_end_addr, 2 * MiB); @@ -128,11 +135,11 @@ char *riscv_find_firmware(const char *firmware_filename, target_ulong riscv_find_and_load_firmware(MachineState *machine, const char *default_machine_firmware, - hwaddr firmware_load_addr, + hwaddr *firmware_load_addr, symbol_fn_t sym_cb) { char *firmware_filename; - target_ulong firmware_end_addr = firmware_load_addr; + target_ulong firmware_end_addr = *firmware_load_addr; firmware_filename = riscv_find_firmware(machine->firmware, default_machine_firmware); @@ -148,7 +155,7 @@ target_ulong riscv_find_and_load_firmware(MachineState *machine, } target_ulong riscv_load_firmware(const char *firmware_filename, - hwaddr firmware_load_addr, + hwaddr *firmware_load_addr, symbol_fn_t sym_cb) { uint64_t firmware_entry, firmware_end; @@ -159,22 +166,23 @@ target_ulong riscv_load_firmware(const char *firmware_filename, if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL, &firmware_entry, NULL, &firmware_end, NULL, 0, EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { + *firmware_load_addr = firmware_entry; return firmware_end; } firmware_size = load_image_targphys_as(firmware_filename, - firmware_load_addr, + *firmware_load_addr, current_machine->ram_size, NULL); if (firmware_size > 0) { - return firmware_load_addr + firmware_size; + return *firmware_load_addr + firmware_size; } error_report("could not load firmware '%s'", firmware_filename); exit(1); } -static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry) +static void riscv_load_initrd(MachineState *machine, RISCVBootInfo *info) { const char *filename = machine->initrd_filename; uint64_t mem_size = machine->ram_size; @@ -195,7 +203,7 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry) * halfway into RAM, and for boards with 1GB of RAM or more we put * the initrd at 512MB. */ - start = kernel_entry + MIN(mem_size / 2, 512 * MiB); + start = info->image_low_addr + MIN(mem_size / 2, 512 * MiB); size = load_ramdisk(filename, start, mem_size - start); if (size == -1) { @@ -206,6 +214,9 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry) } } + info->initrd_start = start; + info->initrd_size = size; + /* Some RISC-V machines (e.g. opentitan) don't have a fdt. */ if (fdt) { end = start + size; @@ -214,14 +225,14 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry) } } -target_ulong riscv_load_kernel(MachineState *machine, - RISCVHartArrayState *harts, - target_ulong kernel_start_addr, - bool load_initrd, - symbol_fn_t sym_cb) +void riscv_load_kernel(MachineState *machine, + RISCVBootInfo *info, + target_ulong kernel_start_addr, + bool load_initrd, + symbol_fn_t sym_cb) { const char *kernel_filename = machine->kernel_filename; - uint64_t kernel_load_base, kernel_entry; + ssize_t kernel_size; void *fdt = machine->fdt; g_assert(kernel_filename != NULL); @@ -233,21 +244,29 @@ target_ulong riscv_load_kernel(MachineState *machine, * the (expected) load address load address. This allows kernels to have * separate SBI and ELF entry points (used by FreeBSD, for example). */ - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, - NULL, &kernel_load_base, NULL, NULL, 0, - EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) { - kernel_entry = kernel_load_base; + kernel_size = load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, NULL, + &info->image_low_addr, &info->image_high_addr, + NULL, ELFDATA2LSB, EM_RISCV, + 1, 0, NULL, true, sym_cb); + if (kernel_size > 0) { + info->kernel_size = kernel_size; goto out; } - if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, - NULL, NULL, NULL) > 0) { + kernel_size = load_uimage_as(kernel_filename, &info->image_low_addr, + NULL, NULL, NULL, NULL, NULL); + if (kernel_size > 0) { + info->kernel_size = kernel_size; + info->image_high_addr = info->image_low_addr + kernel_size; goto out; } - if (load_image_targphys_as(kernel_filename, kernel_start_addr, - current_machine->ram_size, NULL) > 0) { - kernel_entry = kernel_start_addr; + kernel_size = load_image_targphys_as(kernel_filename, kernel_start_addr, + current_machine->ram_size, NULL); + if (kernel_size > 0) { + info->kernel_size = kernel_size; + info->image_low_addr = kernel_start_addr; + info->image_high_addr = info->image_low_addr + kernel_size; goto out; } @@ -256,23 +275,21 @@ target_ulong riscv_load_kernel(MachineState *machine, out: /* - * For 32 bit CPUs 'kernel_entry' can be sign-extended by + * For 32 bit CPUs 'image_low_addr' can be sign-extended by * load_elf_ram_sym(). */ - if (riscv_is_32bit(harts)) { - kernel_entry = extract64(kernel_entry, 0, 32); + if (info->is_32bit) { + info->image_low_addr = extract64(info->image_low_addr, 0, 32); } if (load_initrd && machine->initrd_filename) { - riscv_load_initrd(machine, kernel_entry); + riscv_load_initrd(machine, info); } if (fdt && machine->kernel_cmdline && *machine->kernel_cmdline) { qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", machine->kernel_cmdline); } - - return kernel_entry; } /* @@ -292,11 +309,12 @@ out: * The FDT is fdt_packed() during the calculation. */ uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size, - MachineState *ms) + MachineState *ms, RISCVBootInfo *info) { int ret = fdt_pack(ms->fdt); hwaddr dram_end, temp; int fdtsize; + uint64_t dtb_start, dtb_start_limit; /* Should only fail if we've built a corrupted tree */ g_assert(ret == 0); @@ -307,6 +325,17 @@ uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size, exit(1); } + if (info->initrd_size) { + /* If initrd is successfully loaded, place DTB after it. */ + dtb_start_limit = info->initrd_start + info->initrd_size; + } else if (info->kernel_size) { + /* If only kernel is successfully loaded, place DTB after it. */ + dtb_start_limit = info->image_high_addr; + } else { + /* Otherwise, do not check DTB overlapping */ + dtb_start_limit = 0; + } + /* * A dram_size == 0, usually from a MemMapEntry[].size element, * means that the DRAM block goes all the way to ms->ram_size. @@ -316,13 +345,24 @@ uint64_t riscv_compute_fdt_addr(hwaddr dram_base, hwaddr dram_size, /* * We should put fdt as far as possible to avoid kernel/initrd overwriting - * its content. But it should be addressable by 32 bit system as well. - * Thus, put it at an 2MB aligned address that less than fdt size from the - * end of dram or 3GB whichever is lesser. + * its content. But it should be addressable by 32 bit system as well in RV32. + * Thus, put it near to the end of dram in RV64, and put it near to the end + * of dram or 3GB whichever is lesser in RV32. */ - temp = (dram_base < 3072 * MiB) ? MIN(dram_end, 3072 * MiB) : dram_end; + if (!info->is_32bit) { + temp = dram_end; + } else { + temp = (dram_base < 3072 * MiB) ? MIN(dram_end, 3072 * MiB) : dram_end; + } - return QEMU_ALIGN_DOWN(temp - fdtsize, 2 * MiB); + dtb_start = QEMU_ALIGN_DOWN(temp - fdtsize, 2 * MiB); + + if (dtb_start_limit && (dtb_start < dtb_start_limit)) { + error_report("No enough memory to place DTB after kernel/initrd"); + exit(1); + } + + return dtb_start; } /* @@ -334,35 +374,39 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt) uint32_t fdtsize = fdt_totalsize(fdt); /* copy in the device tree */ - qemu_fdt_dumpdtb(fdt, fdtsize); - rom_add_blob_fixed_as("fdt", fdt, fdtsize, fdt_addr, &address_space_memory); qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds, rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); } -void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base, - hwaddr rom_size, uint32_t reset_vec_size, +void riscv_rom_copy_firmware_info(MachineState *machine, + RISCVHartArrayState *harts, + hwaddr rom_base, hwaddr rom_size, + uint32_t reset_vec_size, uint64_t kernel_entry) { + struct fw_dynamic_info32 dinfo32; struct fw_dynamic_info dinfo; size_t dinfo_len; - if (sizeof(dinfo.magic) == 4) { - dinfo.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE); - dinfo.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION); - dinfo.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S); - dinfo.next_addr = cpu_to_le32(kernel_entry); + if (riscv_is_32bit(harts)) { + dinfo32.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE); + dinfo32.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION); + dinfo32.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S); + dinfo32.next_addr = cpu_to_le32(kernel_entry); + dinfo32.options = 0; + dinfo32.boot_hart = 0; + dinfo_len = sizeof(dinfo32); } else { dinfo.magic = cpu_to_le64(FW_DYNAMIC_INFO_MAGIC_VALUE); dinfo.version = cpu_to_le64(FW_DYNAMIC_INFO_VERSION); dinfo.next_mode = cpu_to_le64(FW_DYNAMIC_INFO_NEXT_MODE_S); dinfo.next_addr = cpu_to_le64(kernel_entry); + dinfo.options = 0; + dinfo.boot_hart = 0; + dinfo_len = sizeof(dinfo); } - dinfo.options = 0; - dinfo.boot_hart = 0; - dinfo_len = sizeof(dinfo); /** * copy the dynamic firmware info. This information is specific to @@ -374,7 +418,10 @@ void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base, exit(1); } - rom_add_blob_fixed_as("mrom.finfo", &dinfo, dinfo_len, + rom_add_blob_fixed_as("mrom.finfo", + riscv_is_32bit(harts) ? + (void *)&dinfo32 : (void *)&dinfo, + dinfo_len, rom_base + reset_vec_size, &address_space_memory); } @@ -430,7 +477,9 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts } rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec), rom_base, &address_space_memory); - riscv_rom_copy_firmware_info(machine, rom_base, rom_size, sizeof(reset_vec), + riscv_rom_copy_firmware_info(machine, harts, + rom_base, rom_size, + sizeof(reset_vec), kernel_entry); } diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index f872674..c22f3a7 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -10,5 +10,8 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) +riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( + 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) +riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/microblaze-v-generic.c b/hw/riscv/microblaze-v-generic.c new file mode 100644 index 0000000..e863c50 --- /dev/null +++ b/hw/riscv/microblaze-v-generic.c @@ -0,0 +1,189 @@ +/* + * QEMU model of Microblaze V generic board. + * + * based on hw/microblaze/petalogix_ml605_mmu.c + * + * Copyright (c) 2011 Michal Simek <monstr@monstr.eu> + * Copyright (c) 2011 PetaLogix + * Copyright (c) 2009 Edgar E. Iglesias. + * Copyright (C) 2024, Advanced Micro Devices, Inc. + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Written by Sai Pavan Boddu <sai.pavan.boddu@amd.com + * and by Michal Simek <michal.simek@amd.com>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "cpu.h" +#include "hw/sysbus.h" +#include "system/system.h" +#include "net/net.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "system/address-spaces.h" +#include "hw/char/xilinx_uartlite.h" +#include "hw/misc/unimp.h" + +#define LMB_BRAM_SIZE (128 * KiB) +#define MEMORY_BASEADDR 0x80000000 +#define INTC_BASEADDR 0x41200000 +#define TIMER_BASEADDR 0x41c00000 +#define TIMER_BASEADDR2 0x41c10000 +#define UARTLITE_BASEADDR 0x40600000 +#define ETHLITE_BASEADDR 0x40e00000 +#define UART16550_BASEADDR 0x44a10000 +#define AXIENET_BASEADDR 0x40c00000 +#define AXIDMA_BASEADDR 0x41e00000 +#define GPIO_BASEADDR 0x40000000 +#define GPIO_BASEADDR2 0x40010000 +#define GPIO_BASEADDR3 0x40020000 +#define I2C_BASEADDR 0x40800000 +#define QSPI_BASEADDR 0x44a00000 + +#define TIMER_IRQ 0 +#define UARTLITE_IRQ 1 +#define UART16550_IRQ 4 +#define ETHLITE_IRQ 5 +#define TIMER_IRQ2 6 +#define AXIENET_IRQ 7 +#define AXIDMA_IRQ1 8 +#define AXIDMA_IRQ0 9 + +static void mb_v_generic_init(MachineState *machine) +{ + ram_addr_t ram_size = machine->ram_size; + DeviceState *dev, *dma, *eth0; + Object *ds, *cs; + int i; + RISCVCPU *cpu; + hwaddr ddr_base = MEMORY_BASEADDR; + MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1); + MemoryRegion *phys_ram = g_new(MemoryRegion, 1); + qemu_irq irq[32]; + MemoryRegion *sysmem = get_system_memory(); + + cpu = RISCV_CPU(object_new(machine->cpu_type)); + object_property_set_bool(OBJECT(cpu), "h", false, NULL); + object_property_set_bool(OBJECT(cpu), "d", false, NULL); + qdev_realize(DEVICE(cpu), NULL, &error_abort); + /* Attach emulated BRAM through the LMB. */ + memory_region_init_ram(phys_lmb_bram, NULL, + "mb_v.lmb_bram", LMB_BRAM_SIZE, + &error_fatal); + memory_region_add_subregion(sysmem, 0x00000000, phys_lmb_bram); + + memory_region_init_ram(phys_ram, NULL, "mb_v.ram", + ram_size, &error_fatal); + memory_region_add_subregion(sysmem, ddr_base, phys_ram); + + dev = qdev_new("xlnx.xps-intc"); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); + qdev_prop_set_uint32(dev, "kind-of-intr", + 1 << UARTLITE_IRQ); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, + qdev_get_gpio_in(DEVICE(cpu), 11)); + for (i = 0; i < 32; i++) { + irq[i] = qdev_get_gpio_in(dev, i); + } + + /* Uartlite */ + dev = qdev_new(TYPE_XILINX_UARTLITE); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); + qdev_prop_set_chr(dev, "chardev", serial_hd(0)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, UARTLITE_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[UARTLITE_IRQ]); + + /* Full uart */ + serial_mm_init(sysmem, UART16550_BASEADDR + 0x1000, 2, + irq[UART16550_IRQ], 115200, serial_hd(1), + DEVICE_LITTLE_ENDIAN); + + /* 2 timers at irq 0 @ 100 Mhz. */ + dev = qdev_new("xlnx.xps-timer"); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); + qdev_prop_set_uint32(dev, "one-timer-only", 0); + qdev_prop_set_uint32(dev, "clock-frequency", 100000000); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]); + + /* 2 timers at irq 3 @ 100 Mhz. */ + dev = qdev_new("xlnx.xps-timer"); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); + qdev_prop_set_uint32(dev, "one-timer-only", 0); + qdev_prop_set_uint32(dev, "clock-frequency", 100000000); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR2); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ2]); + + /* Emaclite */ + dev = qdev_new("xlnx.xps-ethernetlite"); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); + qemu_configure_nic_device(dev, true, NULL); + qdev_prop_set_uint32(dev, "tx-ping-pong", 0); + qdev_prop_set_uint32(dev, "rx-ping-pong", 0); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, ETHLITE_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[ETHLITE_IRQ]); + + /* axi ethernet and dma initialization. */ + eth0 = qdev_new("xlnx.axi-ethernet"); + dma = qdev_new("xlnx.axi-dma"); + + /* FIXME: attach to the sysbus instead */ + object_property_add_child(qdev_get_machine(), "xilinx-eth", OBJECT(eth0)); + object_property_add_child(qdev_get_machine(), "xilinx-dma", OBJECT(dma)); + + ds = object_property_get_link(OBJECT(dma), + "axistream-connected-target", NULL); + cs = object_property_get_link(OBJECT(dma), + "axistream-control-connected-target", NULL); + qemu_configure_nic_device(eth0, true, NULL); + qdev_prop_set_uint32(eth0, "rxmem", 0x1000); + qdev_prop_set_uint32(eth0, "txmem", 0x1000); + object_property_set_link(OBJECT(eth0), "axistream-connected", ds, + &error_abort); + object_property_set_link(OBJECT(eth0), "axistream-control-connected", cs, + &error_abort); + sysbus_realize_and_unref(SYS_BUS_DEVICE(eth0), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(eth0), 0, AXIENET_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(eth0), 0, irq[AXIENET_IRQ]); + + ds = object_property_get_link(OBJECT(eth0), + "axistream-connected-target", NULL); + cs = object_property_get_link(OBJECT(eth0), + "axistream-control-connected-target", NULL); + qdev_prop_set_uint32(dma, "freqhz", 100000000); + object_property_set_link(OBJECT(dma), "axistream-connected", ds, + &error_abort); + object_property_set_link(OBJECT(dma), "axistream-control-connected", cs, + &error_abort); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dma), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dma), 0, AXIDMA_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dma), 0, irq[AXIDMA_IRQ0]); + sysbus_connect_irq(SYS_BUS_DEVICE(dma), 1, irq[AXIDMA_IRQ1]); + + /* unimplemented devices */ + create_unimplemented_device("gpio", GPIO_BASEADDR, 0x10000); + create_unimplemented_device("gpio2", GPIO_BASEADDR2, 0x10000); + create_unimplemented_device("gpio3", GPIO_BASEADDR3, 0x10000); + create_unimplemented_device("i2c", I2C_BASEADDR, 0x10000); + create_unimplemented_device("qspi", QSPI_BASEADDR, 0x10000); +} + +static void mb_v_generic_machine_init(MachineClass *mc) +{ + mc->desc = "AMD Microblaze-V generic platform"; + mc->init = mb_v_generic_init; + mc->min_cpus = 1; + mc->max_cpus = 1; + mc->default_cpu_type = TYPE_RISCV_CPU_BASE; + mc->default_cpus = 1; +} + +DEFINE_MACHINE("amd-microblaze-v-generic", mb_v_generic_machine_init) diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c index 7725dfb..2e74783 100644 --- a/hw/riscv/microchip_pfsoc.c +++ b/hw/riscv/microchip_pfsoc.c @@ -39,6 +39,7 @@ #include "qemu/units.h" #include "qemu/cutils.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "hw/boards.h" #include "hw/loader.h" #include "hw/sysbus.h" @@ -51,8 +52,8 @@ #include "hw/riscv/microchip_pfsoc.h" #include "hw/intc/riscv_aclint.h" #include "hw/intc/sifive_plic.h" -#include "sysemu/device_tree.h" -#include "sysemu/sysemu.h" +#include "system/device_tree.h" +#include "system/system.h" /* * The BIOS image used by this machine is called Hart Software Services (HSS). @@ -61,9 +62,6 @@ #define BIOS_FILENAME "hss.bin" #define RESET_VECTOR 0x20220000 -/* CLINT timebase frequency */ -#define CLINT_TIMEBASE_FREQ 1000000 - /* GEM version */ #define GEM_REVISION 0x0107010c @@ -193,6 +191,7 @@ static void microchip_pfsoc_soc_instance_init(Object *obj) static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); + MicrochipIcicleKitState *iks = MICROCHIP_ICICLE_KIT_MACHINE(ms); MicrochipPFSoCState *s = MICROCHIP_PFSOC(dev); const MemMapEntry *memmap = microchip_pfsoc_memmap; MemoryRegion *system_memory = get_system_memory(); @@ -253,7 +252,7 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) memmap[MICROCHIP_PFSOC_CLINT].base + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, - CLINT_TIMEBASE_FREQ, false); + iks->clint_timebase_freq, false); /* L2 cache controller */ create_unimplemented_device("microchip.pfsoc.l2cc", @@ -479,7 +478,7 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) qspi_xip_mem); } -static void microchip_pfsoc_soc_class_init(ObjectClass *oc, void *data) +static void microchip_pfsoc_soc_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -516,11 +515,11 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) uint64_t mem_low_size, mem_high_size; hwaddr firmware_load_addr; const char *firmware_name; - bool kernel_as_payload = false; target_ulong firmware_end_addr, kernel_start_addr; uint64_t kernel_entry; - uint32_t fdt_load_addr; + uint64_t fdt_load_addr; DriveInfo *dinfo = drive_get(IF_SD, 0, 0); + RISCVBootInfo boot_info; /* Sanity check on RAM size */ if (machine->ram_size < mc->default_ram_size) { @@ -578,65 +577,135 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) } /* - * We follow the following table to select which payload we execute. - * - * -bios | -kernel | payload - * -------+------------+-------- - * N | N | HSS - * Y | don't care | HSS - * N | Y | kernel + * We follow the following table to select which firmware we use. * - * This ensures backwards compatibility with how we used to expose -bios - * to users but allows them to run through direct kernel booting as well. - * - * When -kernel is used for direct boot, -dtb must be present to provide - * a valid device tree for the board, as we don't generate device tree. + * -bios | -kernel | firmware + * --------------+------------+-------- + * none | N | error + * none | Y | kernel + * NULL, default | N | BIOS_FILENAME + * NULL, default | Y | RISCV64_BIOS_BIN + * other | don't care | other */ - - if (machine->kernel_filename && machine->dtb) { - int fdt_size; - machine->fdt = load_device_tree(machine->dtb, &fdt_size); - if (!machine->fdt) { - error_report("load_device_tree() failed"); + if (machine->firmware && !strcmp(machine->firmware, "none")) { + if (!machine->kernel_filename) { + error_report("for -bios none, a kernel is required"); exit(1); } - firmware_name = RISCV64_BIOS_BIN; - firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base; - kernel_as_payload = true; - } - - if (!kernel_as_payload) { - firmware_name = BIOS_FILENAME; + firmware_name = NULL; + firmware_load_addr = RESET_VECTOR; + } else if (!machine->firmware || !strcmp(machine->firmware, "default")) { + if (machine->kernel_filename) { + firmware_name = RISCV64_BIOS_BIN; + firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base; + } else { + firmware_name = BIOS_FILENAME; + firmware_load_addr = RESET_VECTOR; + } + } else { + firmware_name = machine->firmware; firmware_load_addr = RESET_VECTOR; } - /* Load the firmware */ - firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name, - firmware_load_addr, NULL); + /* Load the firmware if necessary */ + firmware_end_addr = firmware_load_addr; + if (firmware_name) { + char *filename = riscv_find_firmware(firmware_name, NULL); + if (filename) { + firmware_end_addr = riscv_load_firmware(filename, + &firmware_load_addr, NULL); + g_free(filename); + } + } - if (kernel_as_payload) { - kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, + riscv_boot_info_init(&boot_info, &s->soc.u_cpus); + if (machine->kernel_filename) { + kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info, firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, - kernel_start_addr, true, NULL); + riscv_load_kernel(machine, &boot_info, kernel_start_addr, + true, NULL); + kernel_entry = boot_info.image_low_addr; + + if (machine->dtb) { + int fdt_size; + machine->fdt = load_device_tree(machine->dtb, &fdt_size); + if (!machine->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + + /* Compute the FDT load address in DRAM */ + hwaddr kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_LO].base; + hwaddr kernel_ram_size = memmap[MICROCHIP_PFSOC_DRAM_LO].size; + + if (kernel_entry - kernel_ram_base >= kernel_ram_size) { + kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_HI].base; + kernel_ram_size = mem_high_size; + } + + fdt_load_addr = riscv_compute_fdt_addr(kernel_ram_base, kernel_ram_size, + machine, &boot_info); + riscv_load_fdt(fdt_load_addr, machine->fdt); + } else { + warn_report_once("The QEMU microchip-icicle-kit machine does not " + "generate a device tree, so no device tree is " + "being provided to the guest."); + fdt_load_addr = 0; + } - /* Compute the fdt load address in dram */ - fdt_load_addr = riscv_compute_fdt_addr(memmap[MICROCHIP_PFSOC_DRAM_LO].base, - memmap[MICROCHIP_PFSOC_DRAM_LO].size, - machine); - riscv_load_fdt(fdt_load_addr, machine->fdt); + hwaddr start_addr; + if (firmware_name) { + start_addr = firmware_load_addr; + } else { + start_addr = kernel_entry; + } /* Load the reset vector */ - riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, firmware_load_addr, + riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, start_addr, memmap[MICROCHIP_PFSOC_ENVM_DATA].base, memmap[MICROCHIP_PFSOC_ENVM_DATA].size, kernel_entry, fdt_load_addr); } } -static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data) +static void microchip_icicle_kit_set_clint_timebase_freq(Object *obj, + Visitor *v, + const char *name, + void *opaque, + Error **errp) +{ + MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + s->clint_timebase_freq = value; +} + +static void microchip_icicle_kit_get_clint_timebase_freq(Object *obj, + Visitor *v, + const char *name, + void *opaque, + Error **errp) +{ + MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj); + uint32_t value = s->clint_timebase_freq; + + visit_type_uint32(v, name, &value, errp); +} + +static void microchip_icicle_kit_machine_instance_init(Object *obj) +{ + MicrochipIcicleKitState *m = MICROCHIP_ICICLE_KIT_MACHINE(obj); + m->clint_timebase_freq = 1000000; +} + +static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, + const void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -647,6 +716,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data) mc->min_cpus = MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT + 1; mc->default_cpus = mc->min_cpus; mc->default_ram_id = "microchip.icicle.kit.ram"; + mc->auto_create_sdcard = true; /* * Map 513 MiB high memory, the minimum required high memory size, because @@ -656,12 +726,20 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data) * See memory_tests() in mss_ddr.c in the HSS source code. */ mc->default_ram_size = 1537 * MiB; + + object_class_property_add(oc, "clint-timebase-frequency", "uint32_t", + microchip_icicle_kit_get_clint_timebase_freq, + microchip_icicle_kit_set_clint_timebase_freq, + NULL, NULL); + object_class_property_set_description(oc, "clint-timebase-frequency", + "Set CLINT timebase frequency in Hz."); } static const TypeInfo microchip_icicle_kit_machine_typeinfo = { .name = MACHINE_TYPE_NAME("microchip-icicle-kit"), .parent = TYPE_MACHINE, .class_init = microchip_icicle_kit_machine_class_init, + .instance_init = microchip_icicle_kit_machine_instance_init, .instance_size = sizeof(MicrochipIcicleKitState), }; diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c index cf686f4..7a7b012 100644 --- a/hw/riscv/numa.c +++ b/hw/riscv/numa.c @@ -23,7 +23,7 @@ #include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/riscv/numa.h" -#include "sysemu/device_tree.h" +#include "system/device_tree.h" static bool numa_enabled(const MachineState *ms) { diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index 436503f..d369a8a 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -27,7 +27,8 @@ #include "hw/misc/unimp.h" #include "hw/riscv/boot.h" #include "qemu/units.h" -#include "sysemu/sysemu.h" +#include "system/system.h" +#include "system/address-spaces.h" /* * This version of the OpenTitan machine currently supports @@ -81,6 +82,7 @@ static void opentitan_machine_init(MachineState *machine) OpenTitanState *s = OPENTITAN_MACHINE(machine); const MemMapEntry *memmap = ibex_memmap; MemoryRegion *sys_mem = get_system_memory(); + RISCVBootInfo boot_info; if (machine->ram_size != mc->default_ram_size) { char *sz = size_to_str(mc->default_ram_size); @@ -98,17 +100,19 @@ static void opentitan_machine_init(MachineState *machine) memmap[IBEX_DEV_RAM].base, machine->ram); if (machine->firmware) { - riscv_load_firmware(machine->firmware, memmap[IBEX_DEV_RAM].base, NULL); + hwaddr firmware_load_addr = memmap[IBEX_DEV_RAM].base; + riscv_load_firmware(machine->firmware, &firmware_load_addr, NULL); } + riscv_boot_info_init(&boot_info, &s->soc.cpus); if (machine->kernel_filename) { - riscv_load_kernel(machine, &s->soc.cpus, + riscv_load_kernel(machine, &boot_info, memmap[IBEX_DEV_RAM].base, false, NULL); } } -static void opentitan_machine_class_init(ObjectClass *oc, void *data) +static void opentitan_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -305,12 +309,11 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) memmap[IBEX_DEV_IBEX_CFG].base, memmap[IBEX_DEV_IBEX_CFG].size); } -static Property lowrisc_ibex_soc_props[] = { +static const Property lowrisc_ibex_soc_props[] = { DEFINE_PROP_UINT32("resetvec", LowRISCIbexSoCState, resetvec, 0x20000400), - DEFINE_PROP_END_OF_LIST() }; -static void lowrisc_ibex_soc_class_init(ObjectClass *oc, void *data) +static void lowrisc_ibex_soc_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h new file mode 100644 index 0000000..1017d73 --- /dev/null +++ b/hw/riscv/riscv-iommu-bits.h @@ -0,0 +1,468 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2022-2023 Rivos Inc. + * Copyright © 2023 FORTH-ICS/CARV + * Copyright © 2023 RISC-V IOMMU Task Group + * + * RISC-V IOMMU - Register Layout and Data Structures. + * + * Based on the IOMMU spec version 1.0, 3/2023 + * https://github.com/riscv-non-isa/riscv-iommu + */ + +#ifndef HW_RISCV_IOMMU_BITS_H +#define HW_RISCV_IOMMU_BITS_H + +#define RISCV_IOMMU_SPEC_DOT_VER 0x010 + +#ifndef GENMASK_ULL +#define GENMASK_ULL(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) +#endif + +/* + * struct riscv_iommu_fq_record - Fault/Event Queue Record + * See section 3.2 for more info. + */ +struct riscv_iommu_fq_record { + uint64_t hdr; + uint64_t _reserved; + uint64_t iotval; + uint64_t iotval2; +}; +/* Header fields */ +#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0) +#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32) +#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34) +#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40) + +/* + * struct riscv_iommu_pq_record - PCIe Page Request record + * For more infos on the PCIe Page Request queue see chapter 3.3. + */ +struct riscv_iommu_pq_record { + uint64_t hdr; + uint64_t payload; +}; +/* Header fields */ +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) + +/* Payload fields */ +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) + +/* Common field positions */ +#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) +#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0) +#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0) +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) +#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9) +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) +#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0) +#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60) + +/* 5.3 IOMMU Capabilities (64bits) */ +#define RISCV_IOMMU_REG_CAP 0x0000 +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) +#define RISCV_IOMMU_CAP_SV32 BIT_ULL(8) +#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) +#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) +#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) +#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) +#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) +#define RISCV_IOMMU_CAP_SV57X4 BIT_ULL(19) +#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) +#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) +#define RISCV_IOMMU_CAP_ATS BIT_ULL(25) +#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) +#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) +#define RISCV_IOMMU_CAP_HPM BIT_ULL(30) +#define RISCV_IOMMU_CAP_DBG BIT_ULL(31) +#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) +#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) +#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39) +#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40) + +enum riscv_iommu_igs_modes { + RISCV_IOMMU_CAP_IGS_MSI = 0, + RISCV_IOMMU_CAP_IGS_WSI, + RISCV_IOMMU_CAP_IGS_BOTH +}; + +/* 5.4 Features control register (32bits) */ +#define RISCV_IOMMU_REG_FCTL 0x0008 +#define RISCV_IOMMU_FCTL_BE BIT(0) +#define RISCV_IOMMU_FCTL_WSI BIT(1) +#define RISCV_IOMMU_FCTL_GXL BIT(2) + +/* 5.5 Device-directory-table pointer (64bits) */ +#define RISCV_IOMMU_REG_DDTP 0x0010 +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) +#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD + +enum riscv_iommu_ddtp_modes { + RISCV_IOMMU_DDTP_MODE_OFF = 0, + RISCV_IOMMU_DDTP_MODE_BARE = 1, + RISCV_IOMMU_DDTP_MODE_1LVL = 2, + RISCV_IOMMU_DDTP_MODE_2LVL = 3, + RISCV_IOMMU_DDTP_MODE_3LVL = 4, + RISCV_IOMMU_DDTP_MODE_MAX = 4 +}; + +/* 5.6 Command Queue Base (64bits) */ +#define RISCV_IOMMU_REG_CQB 0x0018 +#define RISCV_IOMMU_CQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.7 Command Queue head (32bits) */ +#define RISCV_IOMMU_REG_CQH 0x0020 + +/* 5.8 Command Queue tail (32bits) */ +#define RISCV_IOMMU_REG_CQT 0x0024 + +/* 5.9 Fault Queue Base (64bits) */ +#define RISCV_IOMMU_REG_FQB 0x0028 +#define RISCV_IOMMU_FQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_FQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.10 Fault Queue Head (32bits) */ +#define RISCV_IOMMU_REG_FQH 0x0030 + +/* 5.11 Fault Queue tail (32bits) */ +#define RISCV_IOMMU_REG_FQT 0x0034 + +/* 5.12 Page Request Queue base (64bits) */ +#define RISCV_IOMMU_REG_PQB 0x0038 +#define RISCV_IOMMU_PQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.13 Page Request Queue head (32bits) */ +#define RISCV_IOMMU_REG_PQH 0x0040 + +/* 5.14 Page Request Queue tail (32bits) */ +#define RISCV_IOMMU_REG_PQT 0x0044 + +/* 5.15 Command Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_CQCSR 0x0048 +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9) +#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10) +#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11) +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.16 Fault Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_FQCSR 0x004C +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.17 Page Request Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_PQCSR 0x0050 +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.18 Interrupt Pending Status (32bits) */ +#define RISCV_IOMMU_REG_IPSR 0x0054 +#define RISCV_IOMMU_IPSR_CIP BIT(0) +#define RISCV_IOMMU_IPSR_FIP BIT(1) +#define RISCV_IOMMU_IPSR_PIP BIT(3) + +enum { + RISCV_IOMMU_INTR_CQ, + RISCV_IOMMU_INTR_FQ, + RISCV_IOMMU_INTR_PM, + RISCV_IOMMU_INTR_PQ, + RISCV_IOMMU_INTR_COUNT +}; + +#define RISCV_IOMMU_IOCOUNT_NUM 31 + +/* 5.19 Performance monitoring counter overflow status (32bits) */ +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) + +/* 5.20 Performance monitoring counter inhibits (32bits) */ +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) + +/* 5.21 Performance monitoring cycles counter (64bits) */ +#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060 +#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0) +#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63) + +/* 5.22 Performance monitoring event counters (31 * 64bits) */ +#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068 +#define RISCV_IOMMU_REG_IOHPMCTR(_n) \ + (RISCV_IOMMU_REG_IOHPMCTR_BASE + (_n * 0x8)) + +/* 5.23 Performance monitoring event selectors (31 * 64bits) */ +#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160 +#define RISCV_IOMMU_REG_IOHPMEVT(_n) \ + (RISCV_IOMMU_REG_IOHPMEVT_BASE + (_n * 0x8)) +#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0) +#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15) +#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16) +#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36) +#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60) +#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61) +#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62) +#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63) + +enum RISCV_IOMMU_HPMEVENT_id { + RISCV_IOMMU_HPMEVENT_INVALID = 0, + RISCV_IOMMU_HPMEVENT_URQ = 1, + RISCV_IOMMU_HPMEVENT_TRQ = 2, + RISCV_IOMMU_HPMEVENT_ATS_RQ = 3, + RISCV_IOMMU_HPMEVENT_TLB_MISS = 4, + RISCV_IOMMU_HPMEVENT_DD_WALK = 5, + RISCV_IOMMU_HPMEVENT_PD_WALK = 6, + RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7, + RISCV_IOMMU_HPMEVENT_G_WALKS = 8, + RISCV_IOMMU_HPMEVENT_MAX = 9 +}; + +/* 5.24 Translation request IOVA (64bits) */ +#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 + +/* 5.25 Translation request control (64bits) */ +#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260 +#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0) +#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3) +#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40) + +/* 5.26 Translation request response (64bits) */ +#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268 +#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0) +#define RISCV_IOMMU_TR_RESPONSE_S BIT_ULL(9) +#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.27 Interrupt cause to vector (64bits) */ +#define RISCV_IOMMU_REG_ICVEC 0x02F8 +#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0) +#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4) +#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8) +#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12) + +/* 5.28 MSI Configuration table (32 * 64bits) */ +#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300 + +#define RISCV_IOMMU_REG_SIZE 0x1000 + +#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0) +#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD + +/* Struct riscv_iommu_dc - Device Context - section 2.1 */ +struct riscv_iommu_dc { + uint64_t tc; + uint64_t iohgatp; + uint64_t ta; + uint64_t fsc; + uint64_t msiptp; + uint64_t msi_addr_mask; + uint64_t msi_addr_pattern; + uint64_t _reserved; +}; + +/* Translation control fields */ +#define RISCV_IOMMU_DC_TC_V BIT_ULL(0) +#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1) +#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2) +#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3) +#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) +#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) +#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) +#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) +#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) +#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) + +/* Second-stage (aka G-stage) context fields */ +#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44) +#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD + +enum riscv_iommu_dc_iohgatp_modes { + RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0, + RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8, + RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8, + RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9, + RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10 +}; + +/* Translation attributes fields */ +#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12) + +/* First-stage context fields */ +#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD + +/* Generic I/O MMU command structure - check section 3.1 */ +struct riscv_iommu_command { + uint64_t dword0; + uint64_t dword1; +}; + +#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0) +#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7) + +#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1 +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0 +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1 +#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10) +#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32) +#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33) +#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44) + +#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2 +#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0 +#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10) +#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32) + +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) + +/* 3.1.4 I/O MMU PCIe ATS */ +#define RISCV_IOMMU_CMD_ATS_OPCODE 4 +#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0 +#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1 +#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32) +#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33) +#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40) +#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56) +/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */ + +/* ATS.PRGR payload */ +#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44) + +enum riscv_iommu_dc_fsc_atp_modes { + RISCV_IOMMU_DC_FSC_MODE_BARE = 0, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3 +}; + +enum riscv_iommu_fq_causes { + RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1, + RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5, + RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7, + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15, + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23, + RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256, + RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257, + RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258, + RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259, + RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260, + RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261, + RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262, + RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263, + RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264, + RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265, + RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266, + RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267, + RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268, + RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269, + RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270, + RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271, + RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272, + RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273, + RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274 +}; + +/* MSI page table pointer */ +#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD +#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0 +#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1 + +/* 2.2 Process Directory Table */ +#define RISCV_IOMMU_PDTE_VALID BIT_ULL(0) +#define RISCV_IOMMU_PDTE_PPN RISCV_IOMMU_PPN_FIELD + +/* Translation attributes fields */ +#define RISCV_IOMMU_PC_TA_V BIT_ULL(0) +#define RISCV_IOMMU_PC_TA_RESERVED GENMASK_ULL(63, 32) + +/* First stage context fields */ +#define RISCV_IOMMU_PC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44) + +enum riscv_iommu_fq_ttypes { + RISCV_IOMMU_FQ_TTYPE_NONE = 0, + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, +}; + +/* + * struct riscv_iommu_msi_pte - MSI Page Table Entry + */ +struct riscv_iommu_msi_pte { + uint64_t pte; + uint64_t mrif_info; +}; + +/* Fields on pte */ +#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0) +#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1) + +#define RISCV_IOMMU_MSI_PTE_M_MRIF 1 +#define RISCV_IOMMU_MSI_PTE_M_BASIC 3 + +/* When M == 1 (MRIF mode) */ +#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7) +/* When M == 3 (basic mode) */ +#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD +#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63) + +/* Fields on mrif_info */ +#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0) +#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD +#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60) + +#endif /* _RISCV_IOMMU_BITS_H_ */ diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c new file mode 100644 index 0000000..c5034bf --- /dev/null +++ b/hw/riscv/riscv-iommu-hpm.c @@ -0,0 +1,381 @@ +/* + * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "cpu_bits.h" +#include "riscv-iommu-hpm.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" +#include "trace.h" + +/* For now we assume IOMMU HPM frequency to be 1GHz so 1-cycle is of 1-ns. */ +static inline uint64_t get_cycles(void) +{ + return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +} + +uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s) +{ + const uint64_t cycle = riscv_iommu_reg_get64( + s, RISCV_IOMMU_REG_IOHPMCYCLES); + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + const uint64_t ctr_prev = s->hpmcycle_prev; + const uint64_t ctr_val = s->hpmcycle_val; + + trace_riscv_iommu_hpm_read(cycle, inhibit, ctr_prev, ctr_val); + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + /* + * Counter should not increment if inhibit bit is set. We can't really + * stop the QEMU_CLOCK_VIRTUAL, so we just return the last updated + * counter value to indicate that counter was not incremented. + */ + return (ctr_val & RISCV_IOMMU_IOHPMCYCLES_COUNTER) | + (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF); + } + + return (ctr_val + get_cycles() - ctr_prev) | + (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF); +} + +static void hpm_incr_ctr(RISCVIOMMUState *s, uint32_t ctr_idx) +{ + const uint32_t off = ctr_idx << 3; + uint64_t cntr_val; + + cntr_val = ldq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off]); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off], cntr_val + 1); + + trace_riscv_iommu_hpm_incr_ctr(cntr_val); + + /* Handle the overflow scenario. */ + if (cntr_val == UINT64_MAX) { + /* + * Generate interrupt only if OF bit is clear. +1 to offset the cycle + * register OF bit. + */ + const uint32_t ovf = + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, + BIT(ctr_idx + 1), 0); + if (!get_field(ovf, BIT(ctr_idx + 1))) { + riscv_iommu_reg_mod64(s, + RISCV_IOMMU_REG_IOHPMEVT_BASE + off, + RISCV_IOMMU_IOHPMEVT_OF, + 0); + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM); + } + } +} + +void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + unsigned event_id) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint32_t did_gscid; + uint32_t pid_pscid; + uint32_t ctr_idx; + gpointer value; + uint32_t ctrs; + uint64_t evt; + + if (!(s->cap & RISCV_IOMMU_CAP_HPM)) { + return; + } + + value = g_hash_table_lookup(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id)); + if (value == NULL) { + return; + } + + for (ctrs = GPOINTER_TO_UINT(value); ctrs != 0; ctrs &= ctrs - 1) { + ctr_idx = ctz32(ctrs); + if (get_field(inhibit, BIT(ctr_idx + 1))) { + continue; + } + + evt = riscv_iommu_reg_get64(s, + RISCV_IOMMU_REG_IOHPMEVT_BASE + (ctr_idx << 3)); + + /* + * It's quite possible that event ID has been changed in counter + * but hashtable hasn't been updated yet. We don't want to increment + * counter for the old event ID. + */ + if (event_id != get_field(evt, RISCV_IOMMU_IOHPMEVT_EVENT_ID)) { + continue; + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_IDT)) { + did_gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); + pid_pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); + } else { + did_gscid = ctx->devid; + pid_pscid = ctx->process_id; + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_PV_PSCV)) { + /* + * If the transaction does not have a valid process_id, counter + * increments if device_id matches DID_GSCID. If the transaction + * has a valid process_id, counter increments if device_id + * matches DID_GSCID and process_id matches PID_PSCID. See + * IOMMU Specification, Chapter 5.23. Performance-monitoring + * event selector. + */ + if (ctx->process_id && + get_field(evt, RISCV_IOMMU_IOHPMEVT_PID_PSCID) != pid_pscid) { + continue; + } + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DV_GSCV)) { + uint32_t mask = ~0; + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DMASK)) { + /* + * 1001 1011 mask = GSCID + * 0000 0111 mask = mask ^ (mask + 1) + * 1111 1000 mask = ~mask; + */ + mask = get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID); + mask = mask ^ (mask + 1); + mask = ~mask; + } + + if ((get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID) & mask) != + (did_gscid & mask)) { + continue; + } + } + + hpm_incr_ctr(s, ctr_idx); + } +} + +/* Timer callback for cycle counter overflow. */ +void riscv_iommu_hpm_timer_cb(void *priv) +{ + RISCVIOMMUState *s = priv; + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint32_t ovf; + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + return; + } + + if (s->irq_overflow_left > 0) { + uint64_t irq_trigger_at = + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->irq_overflow_left; + timer_mod_anticipate_ns(s->hpm_timer, irq_trigger_at); + s->irq_overflow_left = 0; + return; + } + + ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + if (!get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY)) { + /* + * We don't need to set hpmcycle_val to zero and update hpmcycle_prev to + * current clock value. The way we calculate iohpmcycs will overflow + * and return the correct value. This avoids the need to synchronize + * timer callback and write callback. + */ + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, + RISCV_IOMMU_IOCOUNTOVF_CY, 0); + riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_IOHPMCYCLES, + RISCV_IOMMU_IOHPMCYCLES_OVF, 0); + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM); + } +} + +static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint64_t overflow_at, overflow_ns; + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + return; + } + + /* + * We are using INT64_MAX here instead to UINT64_MAX because cycle counter + * has 63-bit precision and INT64_MAX is the maximum it can store. + */ + if (value) { + overflow_ns = INT64_MAX - value + 1; + } else { + overflow_ns = INT64_MAX; + } + + overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns; + + if (overflow_at > INT64_MAX) { + s->irq_overflow_left = overflow_at - INT64_MAX; + overflow_at = INT64_MAX; + } + + timer_mod_anticipate_ns(s->hpm_timer, overflow_at); +} + +/* Updates the internal cycle counter state when iocntinh:CY is changed. */ +void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + + /* We only need to process CY bit toggle. */ + if (!(inhibit ^ prev_cy_inh)) { + return; + } + + trace_riscv_iommu_hpm_iocntinh_cy(prev_cy_inh); + + if (!(inhibit & RISCV_IOMMU_IOCOUNTINH_CY)) { + /* + * Cycle counter is enabled. Just start the timer again and update + * the clock snapshot value to point to the current time to make + * sure iohpmcycles read is correct. + */ + s->hpmcycle_prev = get_cycles(); + hpm_setup_timer(s, s->hpmcycle_val); + } else { + /* + * Cycle counter is disabled. Stop the timer and update the cycle + * counter to record the current value which is last programmed + * value + the cycles passed so far. + */ + s->hpmcycle_val = s->hpmcycle_val + (get_cycles() - s->hpmcycle_prev); + timer_del(s->hpm_timer); + } +} + +void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s) +{ + const uint64_t val = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_IOHPMCYCLES); + const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + + trace_riscv_iommu_hpm_cycle_write(ovf, val); + + /* + * Clear OF bit in IOCNTOVF if it's being cleared in IOHPMCYCLES register. + */ + if (get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY) && + !get_field(val, RISCV_IOMMU_IOHPMCYCLES_OVF)) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, + RISCV_IOMMU_IOCOUNTOVF_CY); + } + + s->hpmcycle_val = val & ~RISCV_IOMMU_IOHPMCYCLES_OVF; + s->hpmcycle_prev = get_cycles(); + hpm_setup_timer(s, s->hpmcycle_val); +} + +static inline bool check_valid_event_id(unsigned event_id) +{ + return event_id > RISCV_IOMMU_HPMEVENT_INVALID && + event_id < RISCV_IOMMU_HPMEVENT_MAX; +} + +static gboolean hpm_event_equal(gpointer key, gpointer value, gpointer udata) +{ + uint32_t *pair = udata; + + if (GPOINTER_TO_UINT(value) & (1 << pair[0])) { + pair[1] = GPOINTER_TO_UINT(key); + return true; + } + + return false; +} + +/* Caller must check ctr_idx against hpm_ctrs to see if its supported or not. */ +static void update_event_map(RISCVIOMMUState *s, uint64_t value, + uint32_t ctr_idx) +{ + unsigned event_id = get_field(value, RISCV_IOMMU_IOHPMEVT_EVENT_ID); + uint32_t pair[2] = { ctr_idx, RISCV_IOMMU_HPMEVENT_INVALID }; + uint32_t new_value = 1 << ctr_idx; + gpointer data; + + /* + * If EventID field is RISCV_IOMMU_HPMEVENT_INVALID + * remove the current mapping. + */ + if (event_id == RISCV_IOMMU_HPMEVENT_INVALID) { + data = g_hash_table_find(s->hpm_event_ctr_map, hpm_event_equal, pair); + + new_value = GPOINTER_TO_UINT(data) & ~(new_value); + if (new_value != 0) { + g_hash_table_replace(s->hpm_event_ctr_map, + GUINT_TO_POINTER(pair[1]), + GUINT_TO_POINTER(new_value)); + } else { + g_hash_table_remove(s->hpm_event_ctr_map, + GUINT_TO_POINTER(pair[1])); + } + + return; + } + + /* Update the counter mask if the event is already enabled. */ + if (g_hash_table_lookup_extended(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id), + NULL, + &data)) { + new_value |= GPOINTER_TO_UINT(data); + } + + g_hash_table_insert(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id), + GUINT_TO_POINTER(new_value)); +} + +void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg) +{ + const uint32_t ctr_idx = (evt_reg - RISCV_IOMMU_REG_IOHPMEVT_BASE) >> 3; + const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + uint64_t val = riscv_iommu_reg_get64(s, evt_reg); + + if (ctr_idx >= s->hpm_cntrs) { + return; + } + + trace_riscv_iommu_hpm_evt_write(ctr_idx, ovf, val); + + /* Clear OF bit in IOCNTOVF if it's being cleared in IOHPMEVT register. */ + if (get_field(ovf, BIT(ctr_idx + 1)) && + !get_field(val, RISCV_IOMMU_IOHPMEVT_OF)) { + /* +1 to offset CYCLE register OF bit. */ + riscv_iommu_reg_mod32( + s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, BIT(ctr_idx + 1)); + } + + if (!check_valid_event_id(get_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID))) { + /* Reset EventID (WARL) field to invalid. */ + val = set_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID, + RISCV_IOMMU_HPMEVENT_INVALID); + riscv_iommu_reg_set64(s, evt_reg, val); + } + + update_event_map(s, val, ctr_idx); +} diff --git a/hw/riscv/riscv-iommu-hpm.h b/hw/riscv/riscv-iommu-hpm.h new file mode 100644 index 0000000..5fc4ef2 --- /dev/null +++ b/hw/riscv/riscv-iommu-hpm.h @@ -0,0 +1,33 @@ +/* + * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_RISCV_IOMMU_HPM_H +#define HW_RISCV_IOMMU_HPM_H + +#include "qom/object.h" +#include "hw/riscv/riscv-iommu.h" + +uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s); +void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + unsigned event_id); +void riscv_iommu_hpm_timer_cb(void *priv); +void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh); +void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s); +void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg); + +#endif diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c new file mode 100644 index 0000000..cdb4a7a --- /dev/null +++ b/hw/riscv/riscv-iommu-pci.c @@ -0,0 +1,217 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "exec/target_page.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/riscv_hart.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/host-utils.h" +#include "qom/object.h" + +#include "cpu_bits.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" +#include "trace.h" + +/* RISC-V IOMMU PCI Device Emulation */ +#define RISCV_PCI_CLASS_SYSTEM_IOMMU 0x0806 + +/* + * 4 MSIx vectors for ICVEC, one for MRIF. The spec mentions in + * the "Placement and data flow" section that: + * + * "The interfaces related to recording an incoming MSI in a memory-resident + * interrupt file (MRIF) are implementation-specific. The partitioning of + * responsibility between the IOMMU and the IO bridge for recording the + * incoming MSI in an MRIF and generating the associated notice MSI are + * implementation-specific." + * + * We're making a design decision to create the MSIx for MRIF in the + * IOMMU MSIx emulation. + */ +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5 + +/* + * 4 vectors that can be used by civ, fiv, pmiv and piv. Number of + * vectors is represented by 2^N, where N = number of writable bits + * in each cause. For 4 vectors we'll write 0b11 (3) in each reg. + */ +#define RISCV_IOMMU_PCI_ICVEC_VECTORS 0x3333 + +typedef struct RISCVIOMMUStatePci { + PCIDevice pci; /* Parent PCIe device state */ + uint16_t vendor_id; + uint16_t device_id; + uint8_t revision; + MemoryRegion bar0; /* PCI BAR (including MSI-x config) */ + RISCVIOMMUState iommu; /* common IOMMU state */ +} RISCVIOMMUStatePci; + +/* interrupt delivery callback */ +static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector) +{ + RISCVIOMMUStatePci *s = container_of(iommu, RISCVIOMMUStatePci, iommu); + + if (msix_enabled(&(s->pci))) { + msix_notify(&(s->pci), vector); + } +} + +static void riscv_iommu_pci_realize(PCIDevice *dev, Error **errp) +{ + RISCVIOMMUStatePci *s = DO_UPCAST(RISCVIOMMUStatePci, pci, dev); + RISCVIOMMUState *iommu = &s->iommu; + uint8_t *pci_conf = dev->config; + Error *err = NULL; + + pci_set_word(pci_conf + PCI_VENDOR_ID, s->vendor_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, s->vendor_id); + pci_set_word(pci_conf + PCI_DEVICE_ID, s->device_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, s->device_id); + pci_set_byte(pci_conf + PCI_REVISION_ID, s->revision); + + /* Set device id for trace / debug */ + DEVICE(iommu)->id = g_strdup_printf("%02x:%02x.%01x", + pci_dev_bus_num(dev), PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + qdev_realize(DEVICE(iommu), NULL, errp); + + memory_region_init(&s->bar0, OBJECT(s), "riscv-iommu-bar0", + QEMU_ALIGN_UP(memory_region_size(&iommu->regs_mr), TARGET_PAGE_SIZE)); + memory_region_add_subregion(&s->bar0, 0, &iommu->regs_mr); + + pcie_endpoint_cap_init(dev, 0); + + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &s->bar0); + + int ret = msix_init(dev, RISCV_IOMMU_PCI_MSIX_VECTORS, + &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG, + &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG + 256, 0, &err); + + if (ret == -ENOTSUP) { + /* + * MSI-x is not supported by the platform. + * Driver should use timer/polling based notification handlers. + */ + warn_report_err(err); + } else if (ret < 0) { + error_propagate(errp, err); + return; + } else { + /* Mark all ICVEC MSIx vectors as used */ + for (int i = 0; i < RISCV_IOMMU_PCI_MSIX_VECTORS; i++) { + msix_vector_use(dev, i); + } + + iommu->notify = riscv_iommu_pci_notify; + } + + PCIBus *bus = pci_device_root_bus(dev); + if (!bus) { + error_setg(errp, "can't find PCIe root port for %02x:%02x.%x", + pci_bus_num(pci_get_bus(dev)), PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + return; + } + + riscv_iommu_pci_setup_iommu(iommu, bus, errp); +} + +static void riscv_iommu_pci_exit(PCIDevice *pci_dev) +{ + pci_setup_iommu(pci_device_root_bus(pci_dev), NULL, NULL); +} + +static const VMStateDescription riscv_iommu_vmstate = { + .name = "riscv-iommu", + .unmigratable = 1 +}; + +static void riscv_iommu_pci_init(Object *obj) +{ + RISCVIOMMUStatePci *s = RISCV_IOMMU_PCI(obj); + RISCVIOMMUState *iommu = &s->iommu; + + object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU); + qdev_alias_all_properties(DEVICE(iommu), obj); + + iommu->icvec_avail_vectors = RISCV_IOMMU_PCI_ICVEC_VECTORS; + riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_MSI); +} + +static const Property riscv_iommu_pci_properties[] = { + DEFINE_PROP_UINT16("vendor-id", RISCVIOMMUStatePci, vendor_id, + PCI_VENDOR_ID_REDHAT), + DEFINE_PROP_UINT16("device-id", RISCVIOMMUStatePci, device_id, + PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), + DEFINE_PROP_UINT8("revision", RISCVIOMMUStatePci, revision, 0x01), +}; + +static void riscv_iommu_pci_reset_hold(Object *obj, ResetType type) +{ + RISCVIOMMUStatePci *pci = RISCV_IOMMU_PCI(obj); + RISCVIOMMUState *iommu = &pci->iommu; + + riscv_iommu_reset(iommu); + + trace_riscv_iommu_pci_reset_hold(type); +} + +static void riscv_iommu_pci_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = riscv_iommu_pci_reset_hold; + + k->realize = riscv_iommu_pci_realize; + k->exit = riscv_iommu_pci_exit; + k->class_id = RISCV_PCI_CLASS_SYSTEM_IOMMU; + dc->desc = "RISCV-IOMMU DMA Remapping device"; + dc->vmsd = &riscv_iommu_vmstate; + dc->hotpluggable = false; + dc->user_creatable = true; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, riscv_iommu_pci_properties); +} + +static const TypeInfo riscv_iommu_pci = { + .name = TYPE_RISCV_IOMMU_PCI, + .parent = TYPE_PCI_DEVICE, + .class_init = riscv_iommu_pci_class_init, + .instance_init = riscv_iommu_pci_init, + .instance_size = sizeof(RISCVIOMMUStatePci), + .interfaces = (const InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { }, + }, +}; + +static void riscv_iommu_register_pci_types(void) +{ + type_register_static(&riscv_iommu_pci); +} + +type_init(riscv_iommu_register_pci_types); diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c new file mode 100644 index 0000000..e34d00a --- /dev/null +++ b/hw/riscv/riscv-iommu-sys.c @@ -0,0 +1,248 @@ +/* + * QEMU emulation of an RISC-V IOMMU Platform Device + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/host-utils.h" +#include "qemu/module.h" +#include "qom/object.h" +#include "trace.h" + +#include "riscv-iommu.h" + +#define RISCV_IOMMU_SYSDEV_ICVEC_VECTORS 0x3333 + +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5 + +/* RISC-V IOMMU System Platform Device Emulation */ + +struct RISCVIOMMUStateSys { + SysBusDevice parent; + uint64_t addr; + uint32_t base_irq; + DeviceState *irqchip; + RISCVIOMMUState iommu; + + /* Wired int support */ + qemu_irq irqs[RISCV_IOMMU_INTR_COUNT]; + + /* Memory Regions for MSIX table and pending bit entries. */ + MemoryRegion msix_table_mmio; + MemoryRegion msix_pba_mmio; + uint8_t *msix_table; + uint8_t *msix_pba; +}; + +static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); + return pci_get_long(s->msix_table + addr); +} + +static void msix_table_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + g_assert(addr + size <= RISCV_IOMMU_PCI_MSIX_VECTORS * PCI_MSIX_ENTRY_SIZE); + pci_set_long(s->msix_table + addr, val); +} + +static const MemoryRegionOps msix_table_mmio_ops = { + .read = msix_table_mmio_read, + .write = msix_table_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .max_access_size = 4, + }, +}; + +static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + RISCVIOMMUStateSys *s = opaque; + + return pci_get_long(s->msix_pba + addr); +} + +static void msix_pba_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ +} + +static const MemoryRegionOps msix_pba_mmio_ops = { + .read = msix_pba_mmio_read, + .write = msix_pba_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .max_access_size = 4, + }, +}; + +static void riscv_iommu_sysdev_init_msi(RISCVIOMMUStateSys *s, + uint32_t n_vectors) +{ + RISCVIOMMUState *iommu = &s->iommu; + uint32_t table_size = n_vectors * PCI_MSIX_ENTRY_SIZE; + uint32_t table_offset = RISCV_IOMMU_REG_MSI_CONFIG; + uint32_t pba_size = QEMU_ALIGN_UP(n_vectors, 64) / 8; + uint32_t pba_offset = RISCV_IOMMU_REG_MSI_CONFIG + 256; + + s->msix_table = g_malloc0(table_size); + s->msix_pba = g_malloc0(pba_size); + + memory_region_init_io(&s->msix_table_mmio, OBJECT(s), &msix_table_mmio_ops, + s, "msix-table", table_size); + memory_region_add_subregion(&iommu->regs_mr, table_offset, + &s->msix_table_mmio); + + memory_region_init_io(&s->msix_pba_mmio, OBJECT(s), &msix_pba_mmio_ops, s, + "msix-pba", pba_size); + memory_region_add_subregion(&iommu->regs_mr, pba_offset, + &s->msix_pba_mmio); +} + +static void riscv_iommu_sysdev_send_MSI(RISCVIOMMUStateSys *s, + uint32_t vector) +{ + uint8_t *table_entry = s->msix_table + vector * PCI_MSIX_ENTRY_SIZE; + uint64_t msi_addr = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + uint32_t msi_data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + MemTxResult result; + + address_space_stl_le(&address_space_memory, msi_addr, + msi_data, MEMTXATTRS_UNSPECIFIED, &result); + trace_riscv_iommu_sys_msi_sent(vector, msi_addr, msi_data, result); +} + +static void riscv_iommu_sysdev_notify(RISCVIOMMUState *iommu, + unsigned vector) +{ + RISCVIOMMUStateSys *s = container_of(iommu, RISCVIOMMUStateSys, iommu); + uint32_t fctl = riscv_iommu_reg_get32(iommu, RISCV_IOMMU_REG_FCTL); + + if (fctl & RISCV_IOMMU_FCTL_WSI) { + qemu_irq_pulse(s->irqs[vector]); + trace_riscv_iommu_sys_irq_sent(vector); + return; + } + + riscv_iommu_sysdev_send_MSI(s, vector); +} + +static void riscv_iommu_sys_realize(DeviceState *dev, Error **errp) +{ + RISCVIOMMUStateSys *s = RISCV_IOMMU_SYS(dev); + SysBusDevice *sysdev = SYS_BUS_DEVICE(s); + PCIBus *pci_bus; + qemu_irq irq; + + qdev_realize(DEVICE(&s->iommu), NULL, errp); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iommu.regs_mr); + if (s->addr) { + sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, s->addr); + } + + pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL); + if (pci_bus) { + riscv_iommu_pci_setup_iommu(&s->iommu, pci_bus, errp); + } + + s->iommu.notify = riscv_iommu_sysdev_notify; + + /* 4 IRQs are defined starting from s->base_irq */ + for (int i = 0; i < RISCV_IOMMU_INTR_COUNT; i++) { + sysbus_init_irq(sysdev, &s->irqs[i]); + irq = qdev_get_gpio_in(s->irqchip, s->base_irq + i); + sysbus_connect_irq(sysdev, i, irq); + } + + riscv_iommu_sysdev_init_msi(s, RISCV_IOMMU_PCI_MSIX_VECTORS); +} + +static void riscv_iommu_sys_init(Object *obj) +{ + RISCVIOMMUStateSys *s = RISCV_IOMMU_SYS(obj); + RISCVIOMMUState *iommu = &s->iommu; + + object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU); + qdev_alias_all_properties(DEVICE(iommu), obj); + + iommu->icvec_avail_vectors = RISCV_IOMMU_SYSDEV_ICVEC_VECTORS; + riscv_iommu_set_cap_igs(iommu, RISCV_IOMMU_CAP_IGS_BOTH); +} + +static const Property riscv_iommu_sys_properties[] = { + DEFINE_PROP_UINT64("addr", RISCVIOMMUStateSys, addr, 0), + DEFINE_PROP_UINT32("base-irq", RISCVIOMMUStateSys, base_irq, 0), + DEFINE_PROP_LINK("irqchip", RISCVIOMMUStateSys, irqchip, + TYPE_DEVICE, DeviceState *), +}; + +static void riscv_iommu_sys_reset_hold(Object *obj, ResetType type) +{ + RISCVIOMMUStateSys *sys = RISCV_IOMMU_SYS(obj); + RISCVIOMMUState *iommu = &sys->iommu; + + riscv_iommu_reset(iommu); + + trace_riscv_iommu_sys_reset_hold(type); +} + +static void riscv_iommu_sys_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = riscv_iommu_sys_reset_hold; + + dc->realize = riscv_iommu_sys_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, riscv_iommu_sys_properties); +} + +static const TypeInfo riscv_iommu_sys = { + .name = TYPE_RISCV_IOMMU_SYS, + .parent = TYPE_SYS_BUS_DEVICE, + .class_init = riscv_iommu_sys_class_init, + .instance_init = riscv_iommu_sys_init, + .instance_size = sizeof(RISCVIOMMUStateSys), +}; + +static void riscv_iommu_register_sys(void) +{ + type_register_static(&riscv_iommu_sys); +} + +type_init(riscv_iommu_register_sys) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c new file mode 100644 index 0000000..a877e5d --- /dev/null +++ b/hw/riscv/riscv-iommu.c @@ -0,0 +1,2679 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2021-2023, Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qom/object.h" +#include "exec/target_page.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_device.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/riscv_hart.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/timer.h" + +#include "cpu_bits.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" +#include "riscv-iommu-hpm.h" +#include "trace.h" + +#define LIMIT_CACHE_CTX (1U << 7) +#define LIMIT_CACHE_IOT (1U << 20) + +/* Physical page number coversions */ +#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) +#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) + +typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; + +/* Device assigned I/O address space */ +struct RISCVIOMMUSpace { + IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ + AddressSpace iova_as; /* IOVA address space for attached device */ + RISCVIOMMUState *iommu; /* Managing IOMMU device state */ + uint32_t devid; /* Requester identifier, AKA device_id */ + bool notifier; /* IOMMU unmap notifier enabled */ + QLIST_ENTRY(RISCVIOMMUSpace) list; +}; + +typedef enum RISCVIOMMUTransTag { + RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ + RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ + RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */ + RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */ +} RISCVIOMMUTransTag; + +/* Address translation cache entry */ +struct RISCVIOMMUEntry { + RISCVIOMMUTransTag tag; /* Translation Tag */ + uint64_t iova:44; /* IOVA Page Number */ + uint64_t pscid:20; /* Process Soft-Context identifier */ + uint64_t phys:44; /* Physical Page Number */ + uint64_t gscid:16; /* Guest Soft-Context identifier */ + uint64_t perm:2; /* IOMMU_RW flags */ +}; + +/* IOMMU index for transactions without process_id specified. */ +#define RISCV_IOMMU_NOPROCID 0 + +static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) +{ + switch (vec_type) { + case RISCV_IOMMU_INTR_CQ: + return icvec & RISCV_IOMMU_ICVEC_CIV; + case RISCV_IOMMU_INTR_FQ: + return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; + case RISCV_IOMMU_INTR_PM: + return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; + case RISCV_IOMMU_INTR_PQ: + return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; + default: + g_assert_not_reached(); + } +} + +void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) +{ + uint32_t ipsr, icvec, vector; + + if (!s->notify) { + return; + } + + icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); + ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); + + if (!(ipsr & (1 << vec_type))) { + vector = riscv_iommu_get_icvec_vector(icvec, vec_type); + s->notify(s, vector); + trace_riscv_iommu_notify_int_vector(vec_type, vector); + } +} + +static void riscv_iommu_fault(RISCVIOMMUState *s, + struct riscv_iommu_fq_record *ev) +{ + uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; + uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; + uint32_t next = (tail + 1) & s->fq_mask; + uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); + + trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), ev->hdr, ev->iotval); + + if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || + !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { + return; + } + + if (head == next) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, + RISCV_IOMMU_FQCSR_FQOF, 0); + } else { + dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); + if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, + RISCV_IOMMU_FQCSR_FQMF, 0); + } else { + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); + } + } + + if (ctrl & RISCV_IOMMU_FQCSR_FIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); + } +} + +static void riscv_iommu_pri(RISCVIOMMUState *s, + struct riscv_iommu_pq_record *pr) +{ + uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; + uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; + uint32_t next = (tail + 1) & s->pq_mask; + uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); + + trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), pr->payload); + + if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || + !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { + return; + } + + if (head == next) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, + RISCV_IOMMU_PQCSR_PQOF, 0); + } else { + dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); + if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, + RISCV_IOMMU_PQCSR_PQMF, 0); + } else { + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); + } + } + + if (ctrl & RISCV_IOMMU_PQCSR_PIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); + } +} + +/* + * Discards all bits from 'val' whose matching bits in the same + * positions in the mask 'ext' are zeros, and packs the remaining + * bits from 'val' contiguously at the least-significant end of the + * result, keeping the same bit order as 'val' and filling any + * other bits at the most-significant end of the result with zeros. + * + * For example, for the following 'val' and 'ext', the return 'ret' + * will be: + * + * val = a b c d e f g h + * ext = 1 0 1 0 0 1 1 0 + * ret = 0 0 0 0 a c f g + * + * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 + * "Process to translate addresses of MSIs", is similar to bit manip + * function PEXT (Parallel bits extract) from x86. + */ +static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) +{ + uint64_t ret = 0; + uint64_t rot = 1; + + while (ext) { + if (ext & 1) { + if (val & 1) { + ret |= rot; + } + rot <<= 1; + } + val >>= 1; + ext >>= 1; + } + + return ret; +} + +/* Check if GPA matches MSI/MRIF pattern. */ +static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + dma_addr_t gpa) +{ + if (!s->enable_msi) { + return false; + } + + if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != + RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { + return false; /* Invalid MSI/MRIF mode */ + } + + if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { + return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ + } + + return true; +} + +/* + * RISCV IOMMU Address Translation Lookup - Page Table Walk + * + * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c + * Both implementation can be merged into single helper function in future. + * Keeping them separate for now, as error reporting and flow specifics are + * sufficiently different for separate implementation. + * + * @s : IOMMU Device State + * @ctx : Translation context for device id and process address space id. + * @iotlb : translation data: physical address and access mode. + * @return : success or fault cause code. + */ +static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + IOMMUTLBEntry *iotlb) +{ + dma_addr_t addr, base; + uint64_t satp, gatp, pte; + bool en_s, en_g; + struct { + unsigned char step; + unsigned char levels; + unsigned char ptidxbits; + unsigned char ptesize; + } sc[2]; + /* Translation stage phase */ + enum { + S_STAGE = 0, + G_STAGE = 1, + } pass; + MemTxResult ret; + + satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); + gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + + en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; + en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; + + /* + * Early check for MSI address match when IOVA == GPA. + * Note that the (!en_s) condition means that the MSI + * page table may only be used when guest pages are + * mapped using the g-stage page table, whether single- + * or two-stage paging is enabled. It's unavoidable though, + * because the spec mandates that we do a first-stage + * translation before we check the MSI page table, which + * means we can't do an early MSI check unless we have + * strictly !en_s. + */ + if (!en_s && (iotlb->perm & IOMMU_WO) && + riscv_iommu_msi_check(s, ctx, iotlb->iova)) { + iotlb->target_as = &s->trap_as; + iotlb->translated_addr = iotlb->iova; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + return 0; + } + + /* Exit early for pass-through mode. */ + if (!(en_s || en_g)) { + iotlb->translated_addr = iotlb->iova; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + /* Allow R/W in pass-through mode */ + iotlb->perm = IOMMU_RW; + return 0; + } + + /* S/G translation parameters. */ + for (pass = 0; pass < 2; pass++) { + uint32_t sv_mode; + + sc[pass].step = 0; + if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : + (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { + /* 32bit mode for GXL/SXL == 1 */ + switch (pass ? gatp : satp) { + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: + sc[pass].levels = 0; + sc[pass].ptidxbits = 0; + sc[pass].ptesize = 0; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 2; + sc[pass].ptidxbits = 10; + sc[pass].ptesize = 4; + break; + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + } else { + /* 64bit mode for GXL/SXL == 0 */ + switch (pass ? gatp : satp) { + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: + sc[pass].levels = 0; + sc[pass].ptidxbits = 0; + sc[pass].ptesize = 0; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 3; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 4; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 5; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + } + }; + + /* S/G stages translation tables root pointers */ + gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); + satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); + addr = (en_s && en_g) ? satp : iotlb->iova; + base = en_g ? gatp : satp; + pass = en_g ? G_STAGE : S_STAGE; + + do { + const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; + const unsigned va_bits = widened + sc[pass].ptidxbits; + const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * + (sc[pass].levels - 1 - sc[pass].step); + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); + const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; + const bool ade = + ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); + + /* Address range check before first level lookup */ + if (!sc[pass].step) { + const uint64_t va_len = va_skip + va_bits; + const uint64_t va_mask = (1ULL << va_len) - 1; + + if (pass == S_STAGE && va_len > 32) { + target_ulong mask, masked_msbs; + + mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; + masked_msbs = (addr >> (va_len - 1)) & mask; + + if (masked_msbs != 0 && masked_msbs != mask) { + return (iotlb->perm & IOMMU_WO) ? + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; + } + } else { + if ((addr & va_mask) != addr) { + return (iotlb->perm & IOMMU_WO) ? + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; + } + } + } + + + if (pass == S_STAGE) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS); + } else { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS); + } + + /* Read page table entry */ + if (sc[pass].ptesize == 4) { + uint32_t pte32 = 0; + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, + MEMTXATTRS_UNSPECIFIED); + pte = pte32; + } else { + ret = ldq_le_dma(s->target_as, pte_addr, &pte, + MEMTXATTRS_UNSPECIFIED); + } + if (ret != MEMTX_OK) { + return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT + : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; + } + + sc[pass].step++; + hwaddr ppn = pte >> PTE_PPN_SHIFT; + + if (!(pte & PTE_V)) { + break; /* Invalid PTE */ + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { + break; /* Reserved leaf PTE flags: PTE_W */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { + break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { + break; /* Misaligned PPN */ + } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { + break; /* Read access check failed */ + } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { + break; /* Write access check failed */ + } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { + break; /* Access bit not set */ + } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { + break; /* Dirty bit not set */ + } else { + /* Leaf PTE, translation completed. */ + sc[pass].step = sc[pass].levels; + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); + /* Update address mask based on smallest translation granularity */ + iotlb->addr_mask &= (1ULL << va_skip) - 1; + /* Continue with S-Stage translation? */ + if (pass && sc[0].step != sc[0].levels) { + pass = S_STAGE; + addr = iotlb->iova; + continue; + } + /* Translation phase completed (GPA or SPA) */ + iotlb->translated_addr = base; + iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) + : IOMMU_RO; + + /* Check MSI GPA address match */ + if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && + riscv_iommu_msi_check(s, ctx, base)) { + /* Trap MSI writes and return GPA address. */ + iotlb->target_as = &s->trap_as; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + return 0; + } + + /* Continue with G-Stage translation? */ + if (!pass && en_g) { + pass = G_STAGE; + addr = base; + base = gatp; + sc[pass].step = 0; + continue; + } + + return 0; + } + + if (sc[pass].step == sc[pass].levels) { + break; /* Can't find leaf PTE */ + } + + /* Continue with G-Stage translation? */ + if (!pass && en_g) { + pass = G_STAGE; + addr = base; + base = gatp; + sc[pass].step = 0; + } + } while (1); + + return (iotlb->perm & IOMMU_WO) ? + (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : + (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); +} + +static void riscv_iommu_report_fault(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, + uint32_t fault_type, uint32_t cause, + bool pv, + uint64_t iotval, uint64_t iotval2) +{ + struct riscv_iommu_fq_record ev = { 0 }; + + if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { + switch (cause) { + case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: + case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: + case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: + case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: + case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: + case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: + case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: + break; + default: + /* DTF prevents reporting a fault for this given cause */ + return; + } + } + + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); + + if (pv) { + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); + } + + ev.iotval = iotval; + ev.iotval2 = iotval2; + + riscv_iommu_fault(s, &ev); +} + +/* Redirect MSI write for given GPA. */ +static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + MemTxResult res; + dma_addr_t addr; + uint64_t intn; + uint32_t n190; + uint64_t pte[2]; + int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; + int cause; + + /* Interrupt File Number */ + intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); + if (intn >= 256) { + /* Interrupt file number out of range */ + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + /* fetch MSI PTE */ + addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); + addr = addr | (intn * sizeof(pte)); + res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED); + if (res != MEMTX_OK) { + if (res == MEMTX_DECODE_ERROR) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; + } else { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + } + goto err; + } + + le64_to_cpus(&pte[0]); + le64_to_cpus(&pte[1]); + + if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { + /* + * The spec mentions that: "If msipte.C == 1, then further + * processing to interpret the PTE is implementation + * defined.". We'll abort with cause = 262 for this + * case too. + */ + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; + goto err; + } + + switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { + case RISCV_IOMMU_MSI_PTE_M_BASIC: + /* MSI Pass-through mode */ + addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); + + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), + PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), + gpa, addr); + + res = dma_memory_write(s->target_as, addr, &data, size, attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + return MEMTX_OK; + case RISCV_IOMMU_MSI_PTE_M_MRIF: + /* MRIF mode, continue. */ + break; + default: + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; + goto err; + } + + /* + * Report an error for interrupt identities exceeding the maximum allowed + * for an IMSIC interrupt file (2047) or destination address is not 32-bit + * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. + */ + if ((data > 2047) || (gpa & 3)) { + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; + goto err; + } + + /* MSI MRIF mode, non atomic pending bit update */ + + /* MRIF pending bit address */ + addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; + addr = addr | ((data & 0x7c0) >> 3); + + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), + PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), + gpa, addr); + + /* MRIF pending bit mask */ + data = 1ULL << (data & 0x03f); + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + intn = intn | data; + res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + /* Get MRIF enable bits */ + addr = addr + sizeof(intn); + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + if (!(intn & data)) { + /* notification disabled, MRIF update completed. */ + return MEMTX_OK; + } + + /* Send notification message */ + addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); + n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | + (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); + + res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); + + return MEMTX_OK; + +err: + riscv_iommu_report_fault(s, ctx, fault_type, cause, + !!ctx->process_id, 0, 0); + return res; +} + +/* + * Check device context configuration as described by the + * riscv-iommu spec section "Device-context configuration + * checks". + */ +static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx) +{ + uint32_t fsc_mode, msi_mode; + uint64_t gatp; + + if (!(s->cap & RISCV_IOMMU_CAP_ATS) && + (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || + ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || + ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { + return false; + } + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && + (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || + ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { + return false; + } + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && + ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { + return false; + } + + if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && + ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { + return false; + } + + if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { + msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); + + if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && + msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { + return false; + } + } + + gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && + gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { + return false; + } + + fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + + if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { + switch (fsc_mode) { + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: + if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: + if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: + if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { + return false; + } + break; + } + } else { + /* DC.tc.PDTV is 0 */ + if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { + return false; + } + + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { + if (fsc_mode == RISCV_IOMMU_CAP_SV32 && + !(s->cap & RISCV_IOMMU_CAP_SV32)) { + return false; + } + } else { + switch (fsc_mode) { + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { + return false; + } + break; + } + } + } + + /* + * CAP_END is always zero (only one endianess). FCTL_BE is + * always zero (little-endian accesses). Thus TC_SBE must + * always be LE, i.e. zero. + */ + if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { + return false; + } + + return true; +} + +/* + * Validate process context (PC) according to section + * "Process-context configuration checks". + */ +static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx) +{ + uint32_t mode; + + if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { + return false; + } + + if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { + return false; + } + + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + switch (mode) { + case RISCV_IOMMU_DC_FSC_MODE_BARE: + /* sv39 and sv32 modes have the same value (8) */ + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + break; + default: + return false; + } + + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { + if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && + !(s->cap & RISCV_IOMMU_CAP_SV32)) { + return false; + } + } else { + switch (mode) { + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { + return false; + } + break; + } + } + + return true; +} + +/* + * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk + * + * @s : IOMMU Device State + * @ctx : Device Translation Context with devid and process_id set. + * @return : success or fault code. + */ +static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) +{ + const uint64_t ddtp = s->ddtp; + unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); + dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); + struct riscv_iommu_dc dc; + /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ + const int dc_fmt = !s->enable_msi; + const size_t dc_len = sizeof(dc) >> dc_fmt; + int depth; + uint64_t de; + + switch (mode) { + case RISCV_IOMMU_DDTP_MODE_OFF: + return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; + + case RISCV_IOMMU_DDTP_MODE_BARE: + /* mock up pass-through translation context */ + ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, + RISCV_IOMMU_DC_IOHGATP_MODE_BARE); + ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, + RISCV_IOMMU_DC_FSC_MODE_BARE); + + ctx->tc = RISCV_IOMMU_DC_TC_V; + if (s->enable_ats) { + ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; + } + + ctx->ta = 0; + ctx->msiptp = 0; + return 0; + + case RISCV_IOMMU_DDTP_MODE_1LVL: + depth = 0; + break; + + case RISCV_IOMMU_DDTP_MODE_2LVL: + depth = 1; + break; + + case RISCV_IOMMU_DDTP_MODE_3LVL: + depth = 2; + break; + + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + + /* + * Check supported device id width (in bits). + * See IOMMU Specification, Chapter 6. Software guidelines. + * - if extended device-context format is used: + * 1LVL: 6, 2LVL: 15, 3LVL: 24 + * - if base device-context format is used: + * 1LVL: 7, 2LVL: 16, 3LVL: 24 + */ + if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + } + + /* Device directory tree walk */ + for (; depth-- > 0; ) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); + /* + * Select device id index bits based on device directory tree level + * and device context format. + * See IOMMU Specification, Chapter 2. Data Structures. + * - if extended device-context format is used: + * device index: [23:15][14:6][5:0] + * - if base device-context format is used: + * device index: [23:16][15:7][6:0] + */ + const int split = depth * 9 + 6 + dc_fmt; + addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; + } + le64_to_cpus(&de); + if (!(de & RISCV_IOMMU_DDTE_VALID)) { + /* invalid directory entry */ + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { + /* reserved bits set */ + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); + } + + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); + + /* index into device context entry page */ + addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; + + memset(&dc, 0, sizeof(dc)); + if (dma_memory_read(s->target_as, addr, &dc, dc_len, + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; + } + + /* Set translation context. */ + ctx->tc = le64_to_cpu(dc.tc); + ctx->gatp = le64_to_cpu(dc.iohgatp); + ctx->satp = le64_to_cpu(dc.fsc); + ctx->ta = le64_to_cpu(dc.ta); + ctx->msiptp = le64_to_cpu(dc.msiptp); + ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); + ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + + if (!riscv_iommu_validate_device_ctx(s, ctx)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + + /* FSC field checks */ + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { + if (ctx->process_id != RISCV_IOMMU_NOPROCID) { + /* PID is disabled */ + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + } + if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { + /* Invalid translation mode */ + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + return 0; + } + + if (ctx->process_id == RISCV_IOMMU_NOPROCID) { + if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { + /* No default process_id enabled, set BARE mode */ + ctx->satp = 0ULL; + return 0; + } else { + /* Use default process_id #0 */ + ctx->process_id = 0; + } + } + + if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { + /* No S-Stage translation, done. */ + return 0; + } + + /* FSC.TC.PDTV enabled */ + if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { + /* Invalid PDTP.MODE */ + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; + } + + for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); + + /* + * Select process id index bits based on process directory tree + * level. See IOMMU Specification, 2.2. Process-Directory-Table. + */ + const int split = depth * 9 + 8; + addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; + } + le64_to_cpus(&de); + if (!(de & RISCV_IOMMU_PDTE_VALID)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; + } + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN)); + } + + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); + + /* Leaf entry in PDT */ + addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; + } + + /* Use FSC and TA from process directory entry. */ + ctx->ta = le64_to_cpu(dc.ta); + ctx->satp = le64_to_cpu(dc.fsc); + + if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; + } + + if (!riscv_iommu_validate_process_ctx(s, ctx)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; + } + + return 0; +} + +/* Translation Context cache support */ +static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) +{ + RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; + RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; + return c1->devid == c2->devid && + c1->process_id == c2->process_id; +} + +static guint riscv_iommu_ctx_hash(gconstpointer v) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; + /* + * Generate simple hash of (process_id, devid) + * assuming 24-bit wide devid. + */ + return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); +} + +static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; + if (ctx->tc & RISCV_IOMMU_DC_TC_V && + ctx->devid == arg->devid && + ctx->process_id == arg->process_id) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; + if (ctx->tc & RISCV_IOMMU_DC_TC_V && + ctx->devid == arg->devid) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + if (ctx->tc & RISCV_IOMMU_DC_TC_V) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, + uint32_t devid, uint32_t process_id) +{ + GHashTable *ctx_cache; + RISCVIOMMUContext key = { + .devid = devid, + .process_id = process_id, + }; + ctx_cache = g_hash_table_ref(s->ctx_cache); + g_hash_table_foreach(ctx_cache, func, &key); + g_hash_table_unref(ctx_cache); +} + +/* Find or allocate translation context for a given {device_id, process_id} */ +static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, + unsigned devid, unsigned process_id, + void **ref) +{ + GHashTable *ctx_cache; + RISCVIOMMUContext *ctx; + RISCVIOMMUContext key = { + .devid = devid, + .process_id = process_id, + }; + + ctx_cache = g_hash_table_ref(s->ctx_cache); + ctx = g_hash_table_lookup(ctx_cache, &key); + + if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { + *ref = ctx_cache; + return ctx; + } + + ctx = g_new0(RISCVIOMMUContext, 1); + ctx->devid = devid; + ctx->process_id = process_id; + + int fault = riscv_iommu_ctx_fetch(s, ctx); + if (!fault) { + if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { + g_hash_table_unref(ctx_cache); + ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, + riscv_iommu_ctx_equal, + g_free, NULL); + g_hash_table_ref(ctx_cache); + g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); + } + g_hash_table_add(ctx_cache, ctx); + *ref = ctx_cache; + return ctx; + } + + g_hash_table_unref(ctx_cache); + *ref = NULL; + + riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, + fault, !!process_id, 0, 0); + + g_free(ctx); + return NULL; +} + +static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) +{ + if (ref) { + g_hash_table_unref((GHashTable *)ref); + } +} + +/* Find or allocate address space for a given device */ +static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) +{ + RISCVIOMMUSpace *as; + + /* FIXME: PCIe bus remapping for attached endpoints. */ + devid |= s->bus << 8; + + QLIST_FOREACH(as, &s->spaces, list) { + if (as->devid == devid) { + break; + } + } + + if (as == NULL) { + char name[64]; + as = g_new0(RISCVIOMMUSpace, 1); + + as->iommu = s; + as->devid = devid; + + snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", + PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); + + /* IOVA address space, untranslated addresses */ + memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), + TYPE_RISCV_IOMMU_MEMORY_REGION, + OBJECT(as), "riscv_iommu", UINT64_MAX); + address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); + + QLIST_INSERT_HEAD(&s->spaces, as, list); + + trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), + PCI_SLOT(as->devid), PCI_FUNC(as->devid)); + } + return &as->iova_as; +} + +/* Translation Object cache support */ +static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) +{ + RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; + RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; + return t1->gscid == t2->gscid && t1->pscid == t2->pscid && + t1->iova == t2->iova && t1->tag == t2->tag; +} + +static guint riscv_iommu_iot_hash(gconstpointer v) +{ + RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; + return (guint)t->iova; +} + +/* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */ +/* GV: 0 AV: 0 GVMA: 1 */ +static +void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */ +static +void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->pscid == arg->pscid) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */ +static +void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->iova == arg->iova) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */ +static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->pscid == arg->pscid && + iot->iova == arg->iova) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */ +/* GV: 1 AV: 0 GVMA: 1 */ +static +void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->gscid == arg->gscid) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */ +static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->gscid == arg->gscid && + iot->pscid == arg->pscid) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */ +/* GV: 1 AV: 1 GVMA: 1 */ +static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->gscid == arg->gscid && + iot->iova == arg->iova) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */ +static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->tag == arg->tag && + iot->gscid == arg->gscid && + iot->pscid == arg->pscid && + iot->iova == arg->iova) { + iot->perm = IOMMU_NONE; + } +} + +/* caller should keep ref-count for iot_cache object */ +static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, + GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag) +{ + RISCVIOMMUEntry key = { + .tag = transtag, + .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), + .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), + .iova = PPN_DOWN(iova), + }; + return g_hash_table_lookup(iot_cache, &key); +} + +/* caller should keep ref-count for iot_cache object */ +static void riscv_iommu_iot_update(RISCVIOMMUState *s, + GHashTable *iot_cache, RISCVIOMMUEntry *iot) +{ + if (!s->iot_limit) { + return; + } + + if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { + iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, + riscv_iommu_iot_equal, + g_free, NULL); + g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); + } + g_hash_table_add(iot_cache, iot); +} + +static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, + uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag) +{ + GHashTable *iot_cache; + RISCVIOMMUEntry key = { + .tag = transtag, + .gscid = gscid, + .pscid = pscid, + .iova = PPN_DOWN(iova), + }; + + iot_cache = g_hash_table_ref(s->iot_cache); + g_hash_table_foreach(iot_cache, func, &key); + g_hash_table_unref(iot_cache); +} + +static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx) +{ + uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); + uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + + if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) { + return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? + RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG; + } else { + return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? + RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN; + } +} + +static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + IOMMUTLBEntry *iotlb, bool enable_cache) +{ + RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx); + RISCVIOMMUEntry *iot; + IOMMUAccessFlags perm; + bool enable_pid; + bool enable_pri; + GHashTable *iot_cache; + int fault; + + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); + + iot_cache = g_hash_table_ref(s->iot_cache); + /* + * TC[32] is reserved for custom extensions, used here to temporarily + * enable automatic page-request generation for ATS queries. + */ + enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); + enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); + + /* Check for ATS request. */ + if (iotlb->perm == IOMMU_NONE) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ); + /* Check if ATS is disabled. */ + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { + enable_pri = false; + fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + goto done; + } + } + + iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag); + perm = iot ? iot->perm : IOMMU_NONE; + if (perm != IOMMU_NONE) { + iotlb->translated_addr = PPN_PHYS(iot->phys); + iotlb->addr_mask = ~TARGET_PAGE_MASK; + iotlb->perm = perm; + fault = 0; + goto done; + } + + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS); + + /* Translate using device directory / page table information. */ + fault = riscv_iommu_spa_fetch(s, ctx, iotlb); + + if (!fault && iotlb->target_as == &s->trap_as) { + /* Do not cache trapped MSI translations */ + goto done; + } + + /* + * We made an implementation choice to not cache identity-mapped + * translations, as allowed by the specification, to avoid + * translation cache evictions for other devices sharing the + * IOMMU hardware model. + */ + if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { + iot = g_new0(RISCVIOMMUEntry, 1); + iot->iova = PPN_DOWN(iotlb->iova); + iot->phys = PPN_DOWN(iotlb->translated_addr); + iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); + iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); + iot->perm = iotlb->perm; + iot->tag = transtag; + riscv_iommu_iot_update(s, iot_cache, iot); + } + +done: + g_hash_table_unref(iot_cache); + + if (enable_pri && fault) { + struct riscv_iommu_pq_record pr = {0}; + if (enable_pid) { + pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, + RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); + } + pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); + pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | + RISCV_IOMMU_PREQ_PAYLOAD_M; + riscv_iommu_pri(s, &pr); + return fault; + } + + if (fault) { + unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; + + if (iotlb->perm & IOMMU_RW) { + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; + } else if (iotlb->perm & IOMMU_RO) { + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; + } + + riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, + iotlb->iova, iotlb->translated_addr); + return fault; + } + + return 0; +} + +/* IOMMU Command Interface */ +static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, + uint64_t addr, uint32_t data) +{ + /* + * ATS processing in this implementation of the IOMMU is synchronous, + * no need to wait for completions here. + */ + if (!notify) { + return MEMTX_OK; + } + + return dma_memory_write(s->target_as, addr, &data, sizeof(data), + MEMTXATTRS_UNSPECIFIED); +} + +static void riscv_iommu_ats(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, + IOMMUAccessFlags perm, + void (*trace_fn)(const char *id)) +{ + RISCVIOMMUSpace *as = NULL; + IOMMUNotifier *n; + IOMMUTLBEvent event; + uint32_t pid; + uint32_t devid; + const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; + + if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { + /* Use device segment and requester id */ + devid = get_field(cmd->dword0, + RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); + } else { + devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); + } + + pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); + + QLIST_FOREACH(as, &s->spaces, list) { + if (as->devid == devid) { + break; + } + } + + if (!as || !as->notifier) { + return; + } + + event.type = flag; + event.entry.perm = perm; + event.entry.target_as = s->target_as; + + IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { + if (!pv || n->iommu_idx == pid) { + event.entry.iova = n->start; + event.entry.addr_mask = n->end - n->start; + trace_fn(as->iova_mr.parent_obj.name); + memory_region_notify_iommu_one(n, &event); + } + } +} + +static void riscv_iommu_ats_inval(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd) +{ + return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, + trace_riscv_iommu_ats_inval); +} + +static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd) +{ + unsigned resp_code = get_field(cmd->dword1, + RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); + + /* Using the access flag to carry response code information */ + IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; + return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, + trace_riscv_iommu_ats_prgr); +} + +static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) +{ + uint64_t old_ddtp = s->ddtp; + uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); + unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); + unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); + bool ok = false; + + /* + * Check for allowed DDTP.MODE transitions: + * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} + * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} + */ + if (new_mode == old_mode || + new_mode == RISCV_IOMMU_DDTP_MODE_OFF || + new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { + ok = true; + } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || + new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || + new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { + ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || + old_mode == RISCV_IOMMU_DDTP_MODE_BARE; + } + + if (ok) { + /* clear reserved and busy bits, report back sanitized version */ + new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, + RISCV_IOMMU_DDTP_MODE, new_mode); + } else { + new_ddtp = old_ddtp; + } + s->ddtp = new_ddtp; + + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); +} + +/* Command function and opcode field. */ +#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) + +static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) +{ + struct riscv_iommu_command cmd; + MemTxResult res; + dma_addr_t addr; + uint32_t tail, head, ctrl; + uint64_t cmd_opcode; + GHFunc func; + + ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; + head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; + + /* Check for pending error or queue processing disabled */ + if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || + !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { + return; + } + + while (tail != head) { + addr = s->cq_addr + head * sizeof(cmd); + res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), + MEMTXATTRS_UNSPECIFIED); + + if (res != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CQMF, 0); + goto fault; + } + + trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); + + cmd_opcode = get_field(cmd.dword0, + RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); + + switch (cmd_opcode) { + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, + RISCV_IOMMU_CMD_IOFENCE_OPCODE): + res = riscv_iommu_iofence(s, + cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); + + if (res != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CQMF, 0); + goto fault; + } + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): + { + bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); + bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); + bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); + uint32_t gscid = get_field(cmd.dword0, + RISCV_IOMMU_CMD_IOTINVAL_GSCID); + uint32_t pscid = get_field(cmd.dword0, + RISCV_IOMMU_CMD_IOTINVAL_PSCID); + hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; + + if (pscv) { + /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ + goto cmd_ill; + } + + func = riscv_iommu_iot_inval_all; + + if (gv) { + func = (av) ? riscv_iommu_iot_inval_gscid_iova : + riscv_iommu_iot_inval_gscid; + } + + riscv_iommu_iot_inval( + s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG); + + riscv_iommu_iot_inval( + s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN); + break; + } + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): + { + bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); + bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); + bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); + uint32_t gscid = get_field(cmd.dword0, + RISCV_IOMMU_CMD_IOTINVAL_GSCID); + uint32_t pscid = get_field(cmd.dword0, + RISCV_IOMMU_CMD_IOTINVAL_PSCID); + hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; + RISCVIOMMUTransTag transtag; + + if (gv) { + transtag = RISCV_IOMMU_TRANS_TAG_VN; + if (pscv) { + func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova : + riscv_iommu_iot_inval_gscid_pscid; + } else { + func = (av) ? riscv_iommu_iot_inval_gscid_iova : + riscv_iommu_iot_inval_gscid; + } + } else { + transtag = RISCV_IOMMU_TRANS_TAG_SS; + if (pscv) { + func = (av) ? riscv_iommu_iot_inval_pscid_iova : + riscv_iommu_iot_inval_pscid; + } else { + func = (av) ? riscv_iommu_iot_inval_iova : + riscv_iommu_iot_inval_all; + } + } + + riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag); + break; + } + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, + RISCV_IOMMU_CMD_IODIR_OPCODE): + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { + /* invalidate all device context cache mappings */ + func = riscv_iommu_ctx_inval_all; + } else { + /* invalidate all device context matching DID */ + func = riscv_iommu_ctx_inval_devid; + } + riscv_iommu_ctx_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, + RISCV_IOMMU_CMD_IODIR_OPCODE): + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { + /* illegal command arguments IODIR_PDT & DV == 0 */ + goto cmd_ill; + } else { + func = riscv_iommu_ctx_inval_devid_procid; + } + riscv_iommu_ctx_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); + break; + + /* ATS commands */ + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, + RISCV_IOMMU_CMD_ATS_OPCODE): + if (!s->enable_ats) { + goto cmd_ill; + } + + riscv_iommu_ats_inval(s, &cmd); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, + RISCV_IOMMU_CMD_ATS_OPCODE): + if (!s->enable_ats) { + goto cmd_ill; + } + + riscv_iommu_ats_prgr(s, &cmd); + break; + + default: + cmd_ill: + /* Invalid instruction, do not advance instruction index. */ + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CMD_ILL, 0); + goto fault; + } + + /* Advance and update head pointer after command completes. */ + head = (head + 1) & s->cq_mask; + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); + } + return; + +fault: + if (ctrl & RISCV_IOMMU_CQCSR_CIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); + } +} + +static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); + s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; + s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); + ctrl_set = RISCV_IOMMU_CQCSR_CQON; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | + RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | + RISCV_IOMMU_CQCSR_FENCE_W_IP; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); + s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; + s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); + ctrl_set = RISCV_IOMMU_FQCSR_FQON; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | + RISCV_IOMMU_FQCSR_FQOF; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); + s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; + s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); + ctrl_set = RISCV_IOMMU_PQCSR_PQON; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | + RISCV_IOMMU_PQCSR_PQOF; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_dbg(RISCVIOMMUState *s) +{ + uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); + uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); + unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); + unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); + RISCVIOMMUContext *ctx; + void *ref; + + if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { + return; + } + + ctx = riscv_iommu_ctx(s, devid, pid, &ref); + if (ctx == NULL) { + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, + RISCV_IOMMU_TR_RESPONSE_FAULT | + (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); + } else { + IOMMUTLBEntry iotlb = { + .iova = iova, + .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, + .addr_mask = ~0, + .target_as = NULL, + }; + int fault = riscv_iommu_translate(s, ctx, &iotlb, false); + if (fault) { + iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); + } else { + iova = iotlb.translated_addr & ~iotlb.addr_mask; + iova >>= TARGET_PAGE_BITS; + iova &= RISCV_IOMMU_TR_RESPONSE_PPN; + + /* We do not support superpages (> 4kbs) for now */ + iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + } + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); + } + + riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, + RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); + riscv_iommu_ctx_put(s, ref); +} + +typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); + +static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) +{ + uint64_t icvec = 0; + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); + + trace_riscv_iommu_icvec_write(data, icvec); + + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); +} + +static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) +{ + uint32_t cqcsr, fqcsr, pqcsr; + uint32_t ipsr_set = 0; + uint32_t ipsr_clr = 0; + + if (data & RISCV_IOMMU_IPSR_CIP) { + cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + + if (cqcsr & RISCV_IOMMU_CQCSR_CIE && + (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || + cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || + cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || + cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_CIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; + } + + if (data & RISCV_IOMMU_IPSR_FIP) { + fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + + if (fqcsr & RISCV_IOMMU_FQCSR_FIE && + (fqcsr & RISCV_IOMMU_FQCSR_FQOF || + fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_FIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; + } + + if (data & RISCV_IOMMU_IPSR_PIP) { + pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + + if (pqcsr & RISCV_IOMMU_PQCSR_PIE && + (pqcsr & RISCV_IOMMU_PQCSR_PQOF || + pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_PIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); +} + +static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s, + uint32_t regb, + bool prev_cy_inh) +{ + switch (regb) { + case RISCV_IOMMU_REG_IOCOUNTINH: + riscv_iommu_process_iocntinh_cy(s, prev_cy_inh); + break; + + case RISCV_IOMMU_REG_IOHPMCYCLES: + case RISCV_IOMMU_REG_IOHPMCYCLES + 4: + riscv_iommu_process_hpmcycle_write(s); + break; + + case RISCV_IOMMU_REG_IOHPMEVT_BASE ... + RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4: + riscv_iommu_process_hpmevt_write(s, regb & ~7); + break; + } +} + +/* + * Write the resulting value of 'data' for the reg specified + * by 'reg_addr', after considering read-only/read-write/write-clear + * bits, in the pointer 'dest'. + * + * The result is written in little-endian. + */ +static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, + void *dest, hwaddr reg_addr, + int size, uint64_t data) +{ + uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); + uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); + uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); + + stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); +} + +static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size, + MemTxAttrs attrs) +{ + riscv_iommu_process_fn *process_fn = NULL; + RISCVIOMMUState *s = opaque; + uint32_t regb = addr & ~3; + uint32_t busy = 0; + uint64_t val = 0; + bool cy_inh = false; + + if ((addr & (size - 1)) != 0) { + /* Unsupported MMIO alignment or access size */ + return MEMTX_ERROR; + } + + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { + /* Unsupported MMIO access location. */ + return MEMTX_ACCESS_ERROR; + } + + /* Track actionable MMIO write. */ + switch (regb) { + case RISCV_IOMMU_REG_DDTP: + case RISCV_IOMMU_REG_DDTP + 4: + process_fn = riscv_iommu_process_ddtp; + regb = RISCV_IOMMU_REG_DDTP; + busy = RISCV_IOMMU_DDTP_BUSY; + break; + + case RISCV_IOMMU_REG_CQT: + process_fn = riscv_iommu_process_cq_tail; + break; + + case RISCV_IOMMU_REG_CQCSR: + process_fn = riscv_iommu_process_cq_control; + busy = RISCV_IOMMU_CQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_FQCSR: + process_fn = riscv_iommu_process_fq_control; + busy = RISCV_IOMMU_FQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_PQCSR: + process_fn = riscv_iommu_process_pq_control; + busy = RISCV_IOMMU_PQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_ICVEC: + case RISCV_IOMMU_REG_IPSR: + /* + * ICVEC and IPSR have special read/write procedures. We'll + * call their respective helpers and exit. + */ + riscv_iommu_write_reg_val(s, &val, addr, size, data); + + /* + * 'val' is stored as LE. Switch to host endianess + * before using it. + */ + val = le64_to_cpu(val); + + if (regb == RISCV_IOMMU_REG_ICVEC) { + riscv_iommu_update_icvec(s, val); + } else { + riscv_iommu_update_ipsr(s, val); + } + + return MEMTX_OK; + + case RISCV_IOMMU_REG_TR_REQ_CTL: + process_fn = riscv_iommu_process_dbg; + regb = RISCV_IOMMU_REG_TR_REQ_CTL; + busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; + break; + + case RISCV_IOMMU_REG_IOCOUNTINH: + if (addr != RISCV_IOMMU_REG_IOCOUNTINH) { + break; + } + /* Store previous value of CY bit. */ + cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) & + RISCV_IOMMU_IOCOUNTINH_CY); + break; + + + default: + break; + } + + /* + * Registers update might be not synchronized with core logic. + * If system software updates register when relevant BUSY bit + * is set IOMMU behavior of additional writes to the register + * is UNSPECIFIED. + */ + riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); + + /* Busy flag update, MSB 4-byte register. */ + if (busy) { + uint32_t rw = ldl_le_p(&s->regs_rw[regb]); + stl_le_p(&s->regs_rw[regb], rw | busy); + } + + /* Process HPM writes and update any internal state if needed. */ + if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF && + regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) { + riscv_iommu_process_hpm_writes(s, regb, cy_inh); + } + + if (process_fn) { + process_fn(s); + } + + return MEMTX_OK; +} + +static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, MemTxAttrs attrs) +{ + RISCVIOMMUState *s = opaque; + uint64_t val = -1; + uint8_t *ptr; + + if ((addr & (size - 1)) != 0) { + /* Unsupported MMIO alignment. */ + return MEMTX_ERROR; + } + + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { + return MEMTX_ACCESS_ERROR; + } + + /* Compute cycle register value. */ + if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) { + val = riscv_iommu_hpmcycle_read(s); + ptr = (uint8_t *)&val + (addr & 7); + } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) { + /* + * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer + * callback completes. In which case CY_OF bit in + * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the + * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as + * it's not dependent over the timer callback and is computed + * from cycle overflow. + */ + val = ldq_le_p(&s->regs_rw[addr]); + val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF) + ? RISCV_IOMMU_IOCOUNTOVF_CY + : 0; + ptr = (uint8_t *)&val + (addr & 3); + } else { + ptr = &s->regs_rw[addr]; + } + + val = ldn_le_p(ptr, size); + + *data = val; + + return MEMTX_OK; +} + +static const MemoryRegionOps riscv_iommu_mmio_ops = { + .read_with_attrs = riscv_iommu_mmio_read, + .write_with_attrs = riscv_iommu_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = false, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + } +}; + +/* + * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" + * memory region as untranslated address, for additional MSI/MRIF interception + * by IOMMU interrupt remapping implementation. + * Note: Device emulation code generating an MSI is expected to provide a valid + * memory transaction attributes with requested_id set. + */ +static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size, MemTxAttrs attrs) +{ + RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; + RISCVIOMMUContext *ctx; + MemTxResult res; + void *ref; + uint32_t devid = attrs.requester_id; + + if (attrs.unspecified) { + return MEMTX_ACCESS_ERROR; + } + + /* FIXME: PCIe bus remapping for attached endpoints. */ + devid |= s->bus << 8; + + ctx = riscv_iommu_ctx(s, devid, 0, &ref); + if (ctx == NULL) { + res = MEMTX_ACCESS_ERROR; + } else { + res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); + } + riscv_iommu_ctx_put(s, ref); + return res; +} + +static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, MemTxAttrs attrs) +{ + return MEMTX_ACCESS_ERROR; +} + +static const MemoryRegionOps riscv_iommu_trap_ops = { + .read_with_attrs = riscv_iommu_trap_read, + .write_with_attrs = riscv_iommu_trap_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = true, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + } +}; + +void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) +{ + s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); +} + +static void riscv_iommu_instance_init(Object *obj) +{ + RISCVIOMMUState *s = RISCV_IOMMU(obj); + + /* Enable translation debug interface */ + s->cap = RISCV_IOMMU_CAP_DBG; + + /* Report QEMU target physical address space limits */ + s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, + TARGET_PHYS_ADDR_SPACE_BITS); + + /* TODO: method to report supported PID bits */ + s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ + s->cap |= RISCV_IOMMU_CAP_PD8; + + /* register storage */ + s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + + /* Mark all registers read-only */ + memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); + + /* Device translation context cache */ + s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, + riscv_iommu_ctx_equal, + g_free, NULL); + + s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, + riscv_iommu_iot_equal, + g_free, NULL); + + s->iommus.le_next = NULL; + s->iommus.le_prev = NULL; + QLIST_INIT(&s->spaces); +} + +static void riscv_iommu_realize(DeviceState *dev, Error **errp) +{ + RISCVIOMMUState *s = RISCV_IOMMU(dev); + + s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; + if (s->enable_msi) { + s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; + } + if (s->enable_ats) { + s->cap |= RISCV_IOMMU_CAP_ATS; + } + if (s->enable_s_stage) { + s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | + RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; + } + if (s->enable_g_stage) { + s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + } + + if (s->hpm_cntrs > 0) { + /* Clip number of HPM counters to maximum supported (31). */ + if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) { + s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM; + } + /* Enable hardware performance monitor interface */ + s->cap |= RISCV_IOMMU_CAP_HPM; + } + + /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ + s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? + RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); + + /* + * Register complete MMIO space, including MSI/PBA registers. + * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, + * managed directly by the PCIDevice implementation. + */ + memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, + "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); + + /* Set power-on register state */ + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], + ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], + ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], + ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], + ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], + ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | + RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | + RISCV_IOMMU_CQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | + RISCV_IOMMU_FQCSR_FQOF); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | + RISCV_IOMMU_FQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | + RISCV_IOMMU_PQCSR_PQOF); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | + RISCV_IOMMU_PQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); + /* If debug registers enabled. */ + if (s->cap & RISCV_IOMMU_CAP_DBG) { + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], + RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); + } + + /* If HPM registers are enabled. */ + if (s->cap & RISCV_IOMMU_CAP_HPM) { + /* +1 for cycle counter bit. */ + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH], + ~((2 << s->hpm_cntrs) - 1)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0); + memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE], + 0x00, s->hpm_cntrs * 8); + memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE], + 0x00, s->hpm_cntrs * 8); + } + + /* Memory region for downstream access, if specified. */ + if (s->target_mr) { + s->target_as = g_new0(AddressSpace, 1); + address_space_init(s->target_as, s->target_mr, + "riscv-iommu-downstream"); + } else { + /* Fallback to global system memory. */ + s->target_as = &address_space_memory; + } + + /* Memory region for untranslated MRIF/MSI writes */ + memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, + "riscv-iommu-trap", ~0ULL); + address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); + + if (s->cap & RISCV_IOMMU_CAP_HPM) { + s->hpm_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s); + s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal); + } +} + +static void riscv_iommu_unrealize(DeviceState *dev) +{ + RISCVIOMMUState *s = RISCV_IOMMU(dev); + + g_hash_table_unref(s->iot_cache); + g_hash_table_unref(s->ctx_cache); + + if (s->cap & RISCV_IOMMU_CAP_HPM) { + g_hash_table_unref(s->hpm_event_ctr_map); + timer_free(s->hpm_timer); + } +} + +void riscv_iommu_reset(RISCVIOMMUState *s) +{ + uint32_t reg_clr; + int ddtp_mode; + + /* + * Clear DDTP while setting DDTP_mode back to user + * initial setting. + */ + ddtp_mode = s->enable_off ? + RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; + s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); + + reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | + RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); + + reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | + RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); + + reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | + RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); + + riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, + RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); + + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); + + g_hash_table_remove_all(s->ctx_cache); + g_hash_table_remove_all(s->iot_cache); +} + +static const Property riscv_iommu_properties[] = { + DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, + RISCV_IOMMU_SPEC_DOT_VER), + DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), + DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, + LIMIT_CACHE_IOT), + DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), + DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), + DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), + DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), + DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), + DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs, + RISCV_IOMMU_IOCOUNT_NUM), +}; + +static void riscv_iommu_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ + dc->user_creatable = false; + dc->realize = riscv_iommu_realize; + dc->unrealize = riscv_iommu_unrealize; + device_class_set_props(dc, riscv_iommu_properties); +} + +static const TypeInfo riscv_iommu_info = { + .name = TYPE_RISCV_IOMMU, + .parent = TYPE_DEVICE, + .instance_size = sizeof(RISCVIOMMUState), + .instance_init = riscv_iommu_instance_init, + .class_init = riscv_iommu_class_init, +}; + +static const char *IOMMU_FLAG_STR[] = { + "NA", + "RO", + "WR", + "RW", +}; + +/* RISC-V IOMMU Memory Region - Address Translation Space */ +static IOMMUTLBEntry riscv_iommu_memory_region_translate( + IOMMUMemoryRegion *iommu_mr, hwaddr addr, + IOMMUAccessFlags flag, int iommu_idx) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + RISCVIOMMUContext *ctx; + void *ref; + IOMMUTLBEntry iotlb = { + .iova = addr, + .target_as = as->iommu->target_as, + .addr_mask = ~0ULL, + .perm = flag, + }; + + ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); + if (ctx == NULL) { + /* Translation disabled or invalid. */ + iotlb.addr_mask = 0; + iotlb.perm = IOMMU_NONE; + } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { + /* Translation disabled or fault reported. */ + iotlb.addr_mask = 0; + iotlb.perm = IOMMU_NONE; + } + + /* Trace all dma translations with original access flags. */ + trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), + PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, + IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, + iotlb.translated_addr); + + riscv_iommu_ctx_put(as->iommu, ref); + + return iotlb; +} + +static int riscv_iommu_memory_region_notify( + IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, + IOMMUNotifierFlag new, Error **errp) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + + if (old == IOMMU_NOTIFIER_NONE) { + as->notifier = true; + trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); + } else if (new == IOMMU_NOTIFIER_NONE) { + as->notifier = false; + trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); + } + + return 0; +} + +static inline bool pci_is_iommu(PCIDevice *pdev) +{ + return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; +} + +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) +{ + RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; + PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); + AddressSpace *as = NULL; + + if (pdev && pci_is_iommu(pdev)) { + return s->target_as; + } + + /* Find first registered IOMMU device */ + while (s->iommus.le_prev) { + s = *(s->iommus.le_prev); + } + + /* Find first matching IOMMU */ + while (s != NULL && as == NULL) { + as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); + s = s->iommus.le_next; + } + + return as ? as : &address_space_memory; +} + +static const PCIIOMMUOps riscv_iommu_ops = { + .get_address_space = riscv_iommu_find_as, +}; + +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, + Error **errp) +{ + if (bus->iommu_ops && + bus->iommu_ops->get_address_space == riscv_iommu_find_as) { + /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ + RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; + QLIST_INSERT_AFTER(last, iommu, iommus); + } else if (!bus->iommu_ops && !bus->iommu_opaque) { + pci_setup_iommu(bus, &riscv_iommu_ops, iommu); + } else { + error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", + pci_bus_num(bus)); + } +} + +static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, + MemTxAttrs attrs) +{ + return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; +} + +static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + return 1 << as->iommu->pid_bits; +} + +static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = riscv_iommu_memory_region_translate; + imrc->notify_flag_changed = riscv_iommu_memory_region_notify; + imrc->attrs_to_index = riscv_iommu_memory_region_index; + imrc->num_indexes = riscv_iommu_memory_region_index_len; +} + +static const TypeInfo riscv_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_RISCV_IOMMU_MEMORY_REGION, + .class_init = riscv_iommu_memory_region_init, +}; + +static void riscv_iommu_register_mr_types(void) +{ + type_register_static(&riscv_iommu_memory_region_info); + type_register_static(&riscv_iommu_info); +} + +type_init(riscv_iommu_register_mr_types); diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h new file mode 100644 index 0000000..a31aa62 --- /dev/null +++ b/hw/riscv/riscv-iommu.h @@ -0,0 +1,157 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_RISCV_IOMMU_STATE_H +#define HW_RISCV_IOMMU_STATE_H + +#include "qom/object.h" +#include "hw/qdev-properties.h" +#include "system/dma.h" +#include "hw/riscv/iommu.h" +#include "hw/riscv/riscv-iommu-bits.h" + +typedef enum riscv_iommu_igs_modes riscv_iommu_igs_mode; + +struct RISCVIOMMUState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + uint32_t version; /* Reported interface version number */ + uint32_t pid_bits; /* process identifier width */ + uint32_t bus; /* PCI bus mapping for non-root endpoints */ + + uint64_t cap; /* IOMMU supported capabilities */ + uint64_t fctl; /* IOMMU enabled features */ + uint64_t icvec_avail_vectors; /* Available interrupt vectors in ICVEC */ + + bool enable_off; /* Enable out-of-reset OFF mode (DMA disabled) */ + bool enable_msi; /* Enable MSI remapping */ + bool enable_ats; /* Enable ATS support */ + bool enable_s_stage; /* Enable S/VS-Stage translation */ + bool enable_g_stage; /* Enable G-Stage translation */ + + /* IOMMU Internal State */ + uint64_t ddtp; /* Validated Device Directory Tree Root Pointer */ + + dma_addr_t cq_addr; /* Command queue base physical address */ + dma_addr_t fq_addr; /* Fault/event queue base physical address */ + dma_addr_t pq_addr; /* Page request queue base physical address */ + + uint32_t cq_mask; /* Command queue index bit mask */ + uint32_t fq_mask; /* Fault/event queue index bit mask */ + uint32_t pq_mask; /* Page request queue index bit mask */ + + /* interrupt notifier */ + void (*notify)(RISCVIOMMUState *iommu, unsigned vector); + + /* IOMMU target address space */ + AddressSpace *target_as; + MemoryRegion *target_mr; + + /* MSI / MRIF access trap */ + AddressSpace trap_as; + MemoryRegion trap_mr; + + GHashTable *ctx_cache; /* Device translation Context Cache */ + + GHashTable *iot_cache; /* IO Translated Address Cache */ + unsigned iot_limit; /* IO Translation Cache size limit */ + + /* MMIO Hardware Interface */ + MemoryRegion regs_mr; + uint8_t *regs_rw; /* register state (user write) */ + uint8_t *regs_wc; /* write-1-to-clear mask */ + uint8_t *regs_ro; /* read-only mask */ + + QLIST_ENTRY(RISCVIOMMUState) iommus; + QLIST_HEAD(, RISCVIOMMUSpace) spaces; + + /* HPM cycle counter */ + QEMUTimer *hpm_timer; + uint64_t hpmcycle_val; /* Current value of cycle register */ + uint64_t hpmcycle_prev; /* Saved value of QEMU_CLOCK_VIRTUAL clock */ + uint64_t irq_overflow_left; /* Value beyond INT64_MAX after overflow */ + + /* HPM event counters */ + GHashTable *hpm_event_ctr_map; /* Mapping of events to counters */ + uint8_t hpm_cntrs; +}; + +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, + Error **errp); +void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode); +void riscv_iommu_reset(RISCVIOMMUState *s); +void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type); + +typedef struct RISCVIOMMUContext RISCVIOMMUContext; +/* Device translation context state. */ +struct RISCVIOMMUContext { + uint64_t devid:24; /* Requester Id, AKA device_id */ + uint64_t process_id:20; /* Process ID. PASID for PCIe */ + uint64_t tc; /* Translation Control */ + uint64_t ta; /* Translation Attributes */ + uint64_t satp; /* S-Stage address translation and protection */ + uint64_t gatp; /* G-Stage address translation and protection */ + uint64_t msi_addr_mask; /* MSI filtering - address mask */ + uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ + uint64_t msiptp; /* MSI redirection page table pointer */ +}; + +/* private helpers */ + +/* Register helper functions */ +static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s, + unsigned idx, uint32_t set, uint32_t clr) +{ + uint32_t val = ldl_le_p(s->regs_rw + idx); + stl_le_p(s->regs_rw + idx, (val & ~clr) | set); + return val; +} + +static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s, unsigned idx, + uint32_t set) +{ + stl_le_p(s->regs_rw + idx, set); +} + +static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s, unsigned idx) +{ + return ldl_le_p(s->regs_rw + idx); +} + +static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s, unsigned idx, + uint64_t set, uint64_t clr) +{ + uint64_t val = ldq_le_p(s->regs_rw + idx); + stq_le_p(s->regs_rw + idx, (val & ~clr) | set); + return val; +} + +static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s, unsigned idx, + uint64_t set) +{ + stq_le_p(s->regs_rw + idx, set); +} + +static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s, + unsigned idx) +{ + return ldq_le_p(s->regs_rw + idx); +} +#endif diff --git a/hw/riscv/riscv_hart.c b/hw/riscv/riscv_hart.c index 613ea2a..7f26760 100644 --- a/hw/riscv/riscv_hart.c +++ b/hw/riscv/riscv_hart.c @@ -21,19 +21,38 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/module.h" -#include "sysemu/reset.h" +#include "system/reset.h" +#include "system/qtest.h" +#include "qemu/cutils.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" #include "hw/qdev-properties.h" #include "hw/riscv/riscv_hart.h" +#include "qemu/error-report.h" -static Property riscv_harts_props[] = { +static const Property riscv_harts_props[] = { DEFINE_PROP_UINT32("num-harts", RISCVHartArrayState, num_harts, 1), DEFINE_PROP_UINT32("hartid-base", RISCVHartArrayState, hartid_base, 0), DEFINE_PROP_STRING("cpu-type", RISCVHartArrayState, cpu_type), DEFINE_PROP_UINT64("resetvec", RISCVHartArrayState, resetvec, DEFAULT_RSTVEC), - DEFINE_PROP_END_OF_LIST(), + + /* + * Smrnmi implementation-defined interrupt and exception trap handlers. + * + * When an RNMI interrupt is detected, the hart then enters M-mode and + * jumps to the address defined by "rnmi-interrupt-vector". + * + * When the hart encounters an exception while executing in M-mode with + * the mnstatus.NMIE bit clear, the hart then jumps to the address + * defined by "rnmi-exception-vector". + */ + DEFINE_PROP_ARRAY("rnmi-interrupt-vector", RISCVHartArrayState, + num_rnmi_irqvec, rnmi_irqvec, qdev_prop_uint64, + uint64_t), + DEFINE_PROP_ARRAY("rnmi-exception-vector", RISCVHartArrayState, + num_rnmi_excpvec, rnmi_excpvec, qdev_prop_uint64, + uint64_t), }; static void riscv_harts_cpu_reset(void *opaque) @@ -42,11 +61,85 @@ static void riscv_harts_cpu_reset(void *opaque) cpu_reset(CPU(cpu)); } +#ifndef CONFIG_USER_ONLY +static void csr_call(char *cmd, uint64_t cpu_num, int csrno, uint64_t *val) +{ + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(cpu_num)); + CPURISCVState *env = &cpu->env; + + int ret = RISCV_EXCP_NONE; + if (strcmp(cmd, "get_csr") == 0) { + ret = riscv_csrr(env, csrno, (target_ulong *)val); + } else if (strcmp(cmd, "set_csr") == 0) { + ret = riscv_csrrw(env, csrno, NULL, *(target_ulong *)val, + MAKE_64BIT_MASK(0, TARGET_LONG_BITS), 0); + } + + g_assert(ret == RISCV_EXCP_NONE); +} + +static bool csr_qtest_callback(CharBackend *chr, gchar **words) +{ + if (strcmp(words[0], "csr") == 0) { + + uint64_t cpu; + uint64_t val; + int rc, csr; + + rc = qemu_strtou64(words[2], NULL, 0, &cpu); + g_assert(rc == 0); + rc = qemu_strtoi(words[3], NULL, 0, &csr); + g_assert(rc == 0); + rc = qemu_strtou64(words[4], NULL, 0, &val); + g_assert(rc == 0); + csr_call(words[1], cpu, csr, &val); + + qtest_sendf(chr, "OK 0 "TARGET_FMT_lx"\n", (target_ulong)val); + + return true; + } + + return false; +} + +static void riscv_cpu_register_csr_qtest_callback(void) +{ + static bool first = true; + if (first) { + first = false; + qtest_set_command_cb(csr_qtest_callback); + } +} +#endif + static bool riscv_hart_realize(RISCVHartArrayState *s, int idx, char *cpu_type, Error **errp) { object_initialize_child(OBJECT(s), "harts[*]", &s->harts[idx], cpu_type); qdev_prop_set_uint64(DEVICE(&s->harts[idx]), "resetvec", s->resetvec); + + if (s->harts[idx].cfg.ext_smrnmi) { + if (idx < s->num_rnmi_irqvec) { + qdev_prop_set_uint64(DEVICE(&s->harts[idx]), + "rnmi-interrupt-vector", s->rnmi_irqvec[idx]); + } + + if (idx < s->num_rnmi_excpvec) { + qdev_prop_set_uint64(DEVICE(&s->harts[idx]), + "rnmi-exception-vector", s->rnmi_excpvec[idx]); + } + } else { + if (s->num_rnmi_irqvec > 0) { + warn_report_once("rnmi-interrupt-vector property is ignored " + "because Smrnmi extension is not enabled."); + } + + if (s->num_rnmi_excpvec > 0) { + warn_report_once("rnmi-exception-vector property is ignored " + "because Smrnmi extension is not enabled."); + } + } + s->harts[idx].env.mhartid = s->hartid_base + idx; qemu_register_reset(riscv_harts_cpu_reset, &s->harts[idx]); return qdev_realize(DEVICE(&s->harts[idx]), NULL, errp); @@ -59,6 +152,10 @@ static void riscv_harts_realize(DeviceState *dev, Error **errp) s->harts = g_new0(RISCVCPU, s->num_harts); +#ifndef CONFIG_USER_ONLY + riscv_cpu_register_csr_qtest_callback(); +#endif + for (n = 0; n < s->num_harts; n++) { if (!riscv_hart_realize(s, n, s->cpu_type, errp)) { return; @@ -66,7 +163,7 @@ static void riscv_harts_realize(DeviceState *dev, Error **errp) } } -static void riscv_harts_class_init(ObjectClass *klass, void *data) +static void riscv_harts_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c index 3888034..3e7f441 100644 --- a/hw/riscv/shakti_c.c +++ b/hw/riscv/shakti_c.c @@ -23,9 +23,9 @@ #include "qemu/error-report.h" #include "hw/intc/sifive_plic.h" #include "hw/intc/riscv_aclint.h" -#include "sysemu/sysemu.h" +#include "system/system.h" #include "hw/qdev-properties.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "hw/riscv/boot.h" static const struct MemmapEntry { @@ -45,6 +45,7 @@ static void shakti_c_machine_state_init(MachineState *mstate) { ShaktiCMachineState *sms = RISCV_SHAKTI_MACHINE(mstate); MemoryRegion *system_memory = get_system_memory(); + hwaddr firmware_load_addr = shakti_c_memmap[SHAKTI_C_RAM].base; /* Initialize SoC */ object_initialize_child(OBJECT(mstate), "soc", &sms->soc, @@ -56,23 +57,21 @@ static void shakti_c_machine_state_init(MachineState *mstate) shakti_c_memmap[SHAKTI_C_RAM].base, mstate->ram); + if (mstate->firmware) { + riscv_load_firmware(mstate->firmware, &firmware_load_addr, NULL); + } + /* ROM reset vector */ - riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus, - shakti_c_memmap[SHAKTI_C_RAM].base, + riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus, firmware_load_addr, shakti_c_memmap[SHAKTI_C_ROM].base, shakti_c_memmap[SHAKTI_C_ROM].size, 0, 0); - if (mstate->firmware) { - riscv_load_firmware(mstate->firmware, - shakti_c_memmap[SHAKTI_C_RAM].base, - NULL); - } } static void shakti_c_machine_instance_init(Object *obj) { } -static void shakti_c_machine_class_init(ObjectClass *klass, void *data) +static void shakti_c_machine_class_init(ObjectClass *klass, const void *data) { MachineClass *mc = MACHINE_CLASS(klass); static const char * const valid_cpu_types[] = { @@ -143,7 +142,7 @@ static void shakti_c_soc_state_realize(DeviceState *dev, Error **errp) shakti_c_memmap[SHAKTI_C_ROM].base, &sss->rom); } -static void shakti_c_soc_class_init(ObjectClass *klass, void *data) +static void shakti_c_soc_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = shakti_c_soc_state_realize; diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c index 87d9602..7baed19 100644 --- a/hw/riscv/sifive_e.c +++ b/hw/riscv/sifive_e.c @@ -35,7 +35,6 @@ #include "hw/boards.h" #include "hw/loader.h" #include "hw/sysbus.h" -#include "hw/char/serial.h" #include "hw/misc/unimp.h" #include "target/riscv/cpu.h" #include "hw/riscv/riscv_hart.h" @@ -47,7 +46,7 @@ #include "hw/misc/sifive_e_prci.h" #include "hw/misc/sifive_e_aon.h" #include "chardev/char.h" -#include "sysemu/sysemu.h" +#include "system/system.h" static const MemMapEntry sifive_e_memmap[] = { [SIFIVE_E_DEV_DEBUG] = { 0x0, 0x1000 }, @@ -79,6 +78,7 @@ static void sifive_e_machine_init(MachineState *machine) SiFiveEState *s = RISCV_E_MACHINE(machine); MemoryRegion *sys_mem = get_system_memory(); int i; + RISCVBootInfo boot_info; if (machine->ram_size != mc->default_ram_size) { char *sz = size_to_str(mc->default_ram_size); @@ -114,8 +114,9 @@ static void sifive_e_machine_init(MachineState *machine) rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec), memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory); + riscv_boot_info_init(&boot_info, &s->soc.cpus); if (machine->kernel_filename) { - riscv_load_kernel(machine, &s->soc.cpus, + riscv_load_kernel(machine, &boot_info, memmap[SIFIVE_E_DEV_DTIM].base, false, NULL); } @@ -142,7 +143,7 @@ static void sifive_e_machine_instance_init(Object *obj) s->revb = false; } -static void sifive_e_machine_class_init(ObjectClass *oc, void *data) +static void sifive_e_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -283,7 +284,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp) &s->xip_mem); } -static void sifive_e_soc_class_init(ObjectClass *oc, void *data) +static void sifive_e_soc_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index af5f923..d69f942 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -43,7 +43,6 @@ #include "hw/irq.h" #include "hw/loader.h" #include "hw/sysbus.h" -#include "hw/char/serial.h" #include "hw/cpu/cluster.h" #include "hw/misc/unimp.h" #include "hw/sd/sd.h" @@ -57,9 +56,9 @@ #include "hw/intc/sifive_plic.h" #include "chardev/char.h" #include "net/eth.h" -#include "sysemu/device_tree.h" -#include "sysemu/runstate.h" -#include "sysemu/sysemu.h" +#include "system/device_tree.h" +#include "system/runstate.h" +#include "system/system.h" #include <libfdt.h> @@ -515,17 +514,19 @@ static void sifive_u_machine_init(MachineState *machine) SiFiveUState *s = RISCV_U_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *flash0 = g_new(MemoryRegion, 1); - target_ulong start_addr = memmap[SIFIVE_U_DEV_DRAM].base; + hwaddr start_addr = memmap[SIFIVE_U_DEV_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; const char *firmware_name; uint32_t start_addr_hi32 = 0x00000000; + uint32_t fdt_load_addr_hi32 = 0x00000000; int i; - uint32_t fdt_load_addr; + uint64_t fdt_load_addr; uint64_t kernel_entry; DriveInfo *dinfo; BlockBackend *blk; DeviceState *flash_dev, *sd_dev, *card_dev; qemu_irq flash_cs, sd_cs; + RISCVBootInfo boot_info; /* Initialize SoC */ object_initialize_child(OBJECT(machine), "soc", &s->soc, TYPE_RISCV_U_SOC); @@ -589,14 +590,15 @@ static void sifive_u_machine_init(MachineState *machine) firmware_name = riscv_default_firmware_name(&s->soc.u_cpus); firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name, - start_addr, NULL); + &start_addr, NULL); + riscv_boot_info_init(&boot_info, &s->soc.u_cpus); if (machine->kernel_filename) { - kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus, + kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info, firmware_end_addr); - - kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus, - kernel_start_addr, true, NULL); + riscv_load_kernel(machine, &boot_info, kernel_start_addr, + true, NULL); + kernel_entry = boot_info.image_low_addr; } else { /* * If dynamic firmware is used, it doesn't know where is the next mode @@ -607,11 +609,12 @@ static void sifive_u_machine_init(MachineState *machine) fdt_load_addr = riscv_compute_fdt_addr(memmap[SIFIVE_U_DEV_DRAM].base, memmap[SIFIVE_U_DEV_DRAM].size, - machine); + machine, &boot_info); riscv_load_fdt(fdt_load_addr, machine->fdt); if (!riscv_is_32bit(&s->soc.u_cpus)) { start_addr_hi32 = (uint64_t)start_addr >> 32; + fdt_load_addr_hi32 = fdt_load_addr >> 32; } /* reset vector */ @@ -626,7 +629,7 @@ static void sifive_u_machine_init(MachineState *machine) start_addr, /* start: .dword */ start_addr_hi32, fdt_load_addr, /* fdt_laddr: .dword */ - 0x00000000, + fdt_load_addr_hi32, 0x00000000, /* fw_dyn: */ }; @@ -646,7 +649,8 @@ static void sifive_u_machine_init(MachineState *machine) rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec), memmap[SIFIVE_U_DEV_MROM].base, &address_space_memory); - riscv_rom_copy_firmware_info(machine, memmap[SIFIVE_U_DEV_MROM].base, + riscv_rom_copy_firmware_info(machine, &s->soc.u_cpus, + memmap[SIFIVE_U_DEV_MROM].base, memmap[SIFIVE_U_DEV_MROM].size, sizeof(reset_vec), kernel_entry); @@ -709,7 +713,7 @@ static void sifive_u_machine_instance_init(Object *obj) object_property_set_description(obj, "serial", "Board serial number"); } -static void sifive_u_machine_class_init(ObjectClass *oc, void *data) +static void sifive_u_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -720,6 +724,7 @@ static void sifive_u_machine_class_init(ObjectClass *oc, void *data) mc->default_cpu_type = SIFIVE_U_CPU; mc->default_cpus = mc->min_cpus; mc->default_ram_id = "riscv.sifive.u.ram"; + mc->auto_create_sdcard = true; object_class_property_add_bool(oc, "start-in-flash", sifive_u_machine_get_start_in_flash, @@ -936,13 +941,12 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_QSPI2_IRQ)); } -static Property sifive_u_soc_props[] = { +static const Property sifive_u_soc_props[] = { DEFINE_PROP_UINT32("serial", SiFiveUSoCState, serial, OTP_SERIAL), DEFINE_PROP_STRING("cpu-type", SiFiveUSoCState, cpu_type), - DEFINE_PROP_END_OF_LIST() }; -static void sifive_u_soc_class_init(ObjectClass *oc, void *data) +static void sifive_u_soc_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 6407439..641aae8 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -36,8 +36,8 @@ #include "hw/char/riscv_htif.h" #include "hw/intc/riscv_aclint.h" #include "chardev/char.h" -#include "sysemu/device_tree.h" -#include "sysemu/sysemu.h" +#include "system/device_tree.h" +#include "system/system.h" #include <libfdt.h> @@ -198,13 +198,15 @@ static void spike_board_init(MachineState *machine) MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); target_ulong firmware_end_addr = memmap[SPIKE_DRAM].base; + hwaddr firmware_load_addr = memmap[SPIKE_DRAM].base; target_ulong kernel_start_addr; char *firmware_name; - uint32_t fdt_load_addr; + uint64_t fdt_load_addr; uint64_t kernel_entry; char *soc_name; int i, base_hartid, hart_count; bool htif_custom_base = false; + RISCVBootInfo boot_info; /* Check socket count limit */ if (SPIKE_SOCKETS_MAX < riscv_socket_count(machine)) { @@ -290,7 +292,7 @@ static void spike_board_init(MachineState *machine) /* Load firmware */ if (firmware_name) { firmware_end_addr = riscv_load_firmware(firmware_name, - memmap[SPIKE_DRAM].base, + &firmware_load_addr, htif_symbol_callback); g_free(firmware_name); } @@ -299,13 +301,14 @@ static void spike_board_init(MachineState *machine) create_fdt(s, memmap, riscv_is_32bit(&s->soc[0]), htif_custom_base); /* Load kernel */ + riscv_boot_info_init(&boot_info, &s->soc[0]); if (machine->kernel_filename) { - kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], + kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info, firmware_end_addr); - kernel_entry = riscv_load_kernel(machine, &s->soc[0], - kernel_start_addr, - true, htif_symbol_callback); + riscv_load_kernel(machine, &boot_info, kernel_start_addr, + true, htif_symbol_callback); + kernel_entry = boot_info.image_low_addr; } else { /* * If dynamic firmware is used, it doesn't know where is the next mode @@ -316,11 +319,11 @@ static void spike_board_init(MachineState *machine) fdt_load_addr = riscv_compute_fdt_addr(memmap[SPIKE_DRAM].base, memmap[SPIKE_DRAM].size, - machine); + machine, &boot_info); riscv_load_fdt(fdt_load_addr, machine->fdt); /* load the reset vector */ - riscv_setup_rom_reset_vec(machine, &s->soc[0], memmap[SPIKE_DRAM].base, + riscv_setup_rom_reset_vec(machine, &s->soc[0], firmware_load_addr, memmap[SPIKE_MROM].base, memmap[SPIKE_MROM].size, kernel_entry, fdt_load_addr); @@ -339,7 +342,7 @@ static void spike_machine_instance_init(Object *obj) { } -static void spike_machine_class_init(ObjectClass *oc, void *data) +static void spike_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events new file mode 100644 index 0000000..b50b14a --- /dev/null +++ b/hw/riscv/trace-events @@ -0,0 +1,26 @@ +# See documentation at docs/devel/tracing.rst + +# riscv-iommu.c +riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f) "%s: device attached %04x:%02x.%d" +riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f, uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason: 0x%"PRIx64" iova: 0x%"PRIx64 +riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64 +riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f, unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64 +riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI 0x%"PRIx64" -> 0x%"PRIx64 +riscv_iommu_mrif_notification(const char *id, uint32_t nid, uint64_t phys) "%s: sent MRIF notification 0x%x to 0x%"PRIx64 +riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command 0x%"PRIx64" 0x%"PRIx64 +riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added" +riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier removed" +riscv_iommu_notify_int_vector(uint32_t cause, uint32_t vector) "Interrupt cause 0x%x sent via vector 0x%x" +riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0x%x actual 0x%x" +riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64 +riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate" +riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response" +riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u" +riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%"PRIx64" msi_data 0x%x result %u" +riscv_iommu_sys_reset_hold(int reset_type) "reset type %d" +riscv_iommu_pci_reset_hold(int reset_type) "reset type %d" +riscv_iommu_hpm_read(uint64_t cycle, uint32_t inhibit, uint64_t ctr_prev, uint64_t ctr_val) "cycle 0x%"PRIx64" inhibit 0x%x ctr_prev 0x%"PRIx64" ctr_val 0x%"PRIx64 +riscv_iommu_hpm_incr_ctr(uint64_t cntr_val) "cntr_val 0x%"PRIx64 +riscv_iommu_hpm_iocntinh_cy(bool prev_cy_inh) "prev_cy_inh %d" +riscv_iommu_hpm_cycle_write(uint32_t ovf, uint64_t val) "ovf 0x%x val 0x%"PRIx64 +riscv_iommu_hpm_evt_write(uint32_t ctr_idx, uint32_t ovf, uint64_t val) "ctr_idx 0x%x ovf 0x%x val 0x%"PRIx64 diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h new file mode 100644 index 0000000..8c0e3ca --- /dev/null +++ b/hw/riscv/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_riscv.h" diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index 0925528..8b5683d 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -38,7 +38,7 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "sysemu/reset.h" +#include "system/reset.h" #define ACPI_BUILD_TABLE_SIZE 0x20000 #define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index)) @@ -141,12 +141,36 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) } } +static void acpi_dsdt_add_plic_aplic(Aml *scope, uint8_t socket_count, + uint64_t mmio_base, uint64_t mmio_size, + const char *hid) +{ + uint64_t plic_aplic_addr; + uint32_t gsi_base; + uint8_t socket; + + for (socket = 0; socket < socket_count; socket++) { + plic_aplic_addr = mmio_base + mmio_size * socket; + gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket; + Aml *dev = aml_device("IC%.02X", socket); + aml_append(dev, aml_name_decl("_HID", aml_string("%s", hid))); + aml_append(dev, aml_name_decl("_UID", aml_int(socket))); + aml_append(dev, aml_name_decl("_GSB", aml_int(gsi_base))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(plic_aplic_addr, mmio_size, + AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } +} + static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, uint32_t uart_irq) { Aml *dev = aml_device("COM0"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501"))); + aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0003"))); aml_append(dev, aml_name_decl("_UID", aml_int(0))); Aml *crs = aml_resource_template(); @@ -175,15 +199,42 @@ acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, } /* + * Add DSDT entry for the IOMMU platform device. + * ACPI ID for IOMMU is defined in the section 6.2 of RISC-V BRS spec. + * https://github.com/riscv-non-isa/riscv-brs/releases/download/v0.8/riscv-brs-spec.pdf + */ +static void acpi_dsdt_add_iommu_sys(Aml *scope, const MemMapEntry *iommu_memmap, + uint32_t iommu_irq) +{ + uint32_t i; + + Aml *dev = aml_device("IMU0"); + aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0004"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(iommu_memmap->base, + iommu_memmap->size, AML_READ_WRITE)); + for (i = iommu_irq; i < iommu_irq + 4; i++) { + aml_append(crs, aml_interrupt(AML_CONSUMER, AML_EDGE, AML_ACTIVE_LOW, + AML_EXCLUSIVE, &i, 1)); + } + + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +/* * Serial Port Console Redirection Table (SPCR) - * Rev: 1.07 + * Rev: 1.10 */ static void spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s) { + const char name[] = "."; AcpiSpcrData serial = { - .interface_type = 0, /* 16550 compatible */ + .interface_type = 0x12, /* 16550 compatible */ .base_addr.id = AML_AS_SYSTEM_MEMORY, .base_addr.width = 32, .base_addr.offset = 0, @@ -205,9 +256,14 @@ spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s) .pci_function = 0, .pci_flags = 0, .pci_segment = 0, + .uart_clk_freq = 0, + .precise_baudrate = 0, + .namespace_string_length = sizeof(name), + .namespace_string_offset = 88, }; - build_spcr(table_data, linker, &serial, 2, s->oem_id, s->oem_table_id); + build_spcr(table_data, linker, &serial, 4, s->oem_id, s->oem_table_id, + name); } /* RHCT Node[N] starts at offset 56 */ @@ -231,7 +287,7 @@ static void build_rhct(GArray *table_data, uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0; RISCVCPU *cpu = &s->soc[0].harts[0]; uint32_t mmu_offset = 0; - uint8_t satp_mode_max; + bool rv32 = riscv_cpu_is_32bit(cpu); g_autofree char *isa = NULL; AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, @@ -251,7 +307,7 @@ static void build_rhct(GArray *table_data, num_rhct_nodes++; } - if (cpu->cfg.satp_mode.supported != 0) { + if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) { num_rhct_nodes++; } @@ -311,22 +367,21 @@ static void build_rhct(GArray *table_data, } /* MMU node structure */ - if (cpu->cfg.satp_mode.supported != 0) { - satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); + if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) { mmu_offset = table_data->len - table.table_offset; build_append_int_noprefix(table_data, 2, 2); /* Type */ build_append_int_noprefix(table_data, 8, 2); /* Length */ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ build_append_int_noprefix(table_data, 0, 1); /* Reserved */ /* MMU Type */ - if (satp_mode_max == VM_1_10_SV57) { + if (cpu->cfg.max_satp_mode == VM_1_10_SV57) { build_append_int_noprefix(table_data, 2, 1); /* Sv57 */ - } else if (satp_mode_max == VM_1_10_SV48) { + } else if (cpu->cfg.max_satp_mode == VM_1_10_SV48) { build_append_int_noprefix(table_data, 1, 1); /* Sv48 */ - } else if (satp_mode_max == VM_1_10_SV39) { + } else if (cpu->cfg.max_satp_mode == VM_1_10_SV39) { build_append_int_noprefix(table_data, 0, 1); /* Sv39 */ } else { - assert(1); + g_assert_not_reached(); } } @@ -411,7 +466,18 @@ static void build_dsdt(GArray *table_data, socket_count = riscv_socket_count(ms); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_PLIC].base, + memmap[VIRT_PLIC].size, "RSCV0001"); + } else { + acpi_dsdt_add_plic_aplic(scope, socket_count, memmap[VIRT_APLIC_S].base, + memmap[VIRT_APLIC_S].size, "RSCV0002"); + } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); + if (virt_is_iommu_sys_enabled(s)) { + acpi_dsdt_add_iommu_sys(scope, &memmap[VIRT_IOMMU_SYS], IOMMU_SYS_IRQ); + } if (socket_count == 1) { virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base, @@ -564,6 +630,187 @@ static void build_madt(GArray *table_data, acpi_table_end(linker, &table); } +#define ID_MAPPING_ENTRY_SIZE 20 +#define IOMMU_ENTRY_SIZE 40 +#define RISCV_INTERRUPT_WIRE_OFFSSET 40 +#define ROOT_COMPLEX_ENTRY_SIZE 20 +#define RIMT_NODE_OFFSET 48 + +/* + * ID Mapping Structure + */ +static void build_rimt_id_mapping(GArray *table_data, uint32_t source_id_base, + uint32_t num_ids, uint32_t dest_id_base) +{ + /* Source ID Base */ + build_append_int_noprefix(table_data, source_id_base, 4); + /* Number of IDs */ + build_append_int_noprefix(table_data, num_ids, 4); + /* Destination Device ID Base */ + build_append_int_noprefix(table_data, source_id_base, 4); + /* Destination IOMMU Offset */ + build_append_int_noprefix(table_data, dest_id_base, 4); + /* Flags */ + build_append_int_noprefix(table_data, 0, 4); +} + +struct AcpiRimtIdMapping { + uint32_t source_id_base; + uint32_t num_ids; +}; +typedef struct AcpiRimtIdMapping AcpiRimtIdMapping; + +/* Build the rimt ID mapping to IOMMU for a given PCI host bridge */ +static int rimt_host_bridges(Object *obj, void *opaque) +{ + GArray *idmap_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + AcpiRimtIdMapping idmap = { + .source_id_base = min_bus << 8, + .num_ids = (max_bus - min_bus + 1) << 8, + }; + g_array_append_val(idmap_blob, idmap); + } + } + + return 0; +} + +static int rimt_idmap_compare(gconstpointer a, gconstpointer b) +{ + AcpiRimtIdMapping *idmap_a = (AcpiRimtIdMapping *)a; + AcpiRimtIdMapping *idmap_b = (AcpiRimtIdMapping *)b; + + return idmap_a->source_id_base - idmap_b->source_id_base; +} + +/* + * RISC-V IO Mapping Table (RIMT) + * https://github.com/riscv-non-isa/riscv-acpi-rimt/releases/download/v0.99/rimt-spec.pdf + */ +static void build_rimt(GArray *table_data, BIOSLinker *linker, + RISCVVirtState *s) +{ + int i, nb_nodes, rc_mapping_count; + size_t node_size, iommu_offset = 0; + uint32_t id = 0; + g_autoptr(GArray) iommu_idmaps = g_array_new(false, true, + sizeof(AcpiRimtIdMapping)); + + AcpiTable table = { .sig = "RIMT", .rev = 1, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + + object_child_foreach_recursive(object_get_root(), + rimt_host_bridges, iommu_idmaps); + + /* Sort the ID mapping by Source ID Base*/ + g_array_sort(iommu_idmaps, rimt_idmap_compare); + + nb_nodes = 2; /* RC, IOMMU */ + rc_mapping_count = iommu_idmaps->len; + /* Number of RIMT Nodes */ + build_append_int_noprefix(table_data, nb_nodes, 4); + + /* Offset to Array of RIMT Nodes */ + build_append_int_noprefix(table_data, RIMT_NODE_OFFSET, 4); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + + iommu_offset = table_data->len - table.table_offset; + /* IOMMU Device Structure */ + build_append_int_noprefix(table_data, 0, 1); /* Type - IOMMU*/ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + node_size = IOMMU_ENTRY_SIZE; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, id++, 2); /* ID */ + if (virt_is_iommu_sys_enabled(s)) { + /* Hardware ID */ + build_append_int_noprefix(table_data, 'R', 1); + build_append_int_noprefix(table_data, 'S', 1); + build_append_int_noprefix(table_data, 'C', 1); + build_append_int_noprefix(table_data, 'V', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '4', 1); + /* Base Address */ + build_append_int_noprefix(table_data, + s->memmap[VIRT_IOMMU_SYS].base, 8); + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + } else { + /* Hardware ID */ + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '1', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '1', 1); + build_append_int_noprefix(table_data, '4', 1); + + build_append_int_noprefix(table_data, 0, 8); /* Base Address */ + build_append_int_noprefix(table_data, 1, 4); /* Flags */ + } + + build_append_int_noprefix(table_data, 0, 4); /* Proximity Domain */ + build_append_int_noprefix(table_data, 0, 2); /* PCI Segment number */ + /* PCIe B/D/F */ + if (virt_is_iommu_sys_enabled(s)) { + build_append_int_noprefix(table_data, 0, 2); + } else { + build_append_int_noprefix(table_data, s->pci_iommu_bdf, 2); + } + /* Number of interrupt wires */ + build_append_int_noprefix(table_data, 0, 2); + /* Interrupt wire array offset */ + build_append_int_noprefix(table_data, RISCV_INTERRUPT_WIRE_OFFSSET, 2); + + /* PCIe Root Complex Node */ + build_append_int_noprefix(table_data, 1, 1); /* Type */ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + node_size = ROOT_COMPLEX_ENTRY_SIZE + + ID_MAPPING_ENTRY_SIZE * rc_mapping_count; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, id++, 2); /* ID */ + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* PCI Segment number */ + build_append_int_noprefix(table_data, 0, 2); + /* ID mapping array offset */ + build_append_int_noprefix(table_data, ROOT_COMPLEX_ENTRY_SIZE, 2); + /* Number of ID mappings */ + build_append_int_noprefix(table_data, rc_mapping_count, 2); + + /* Output Reference */ + AcpiRimtIdMapping *range; + + /* ID mapping array */ + for (i = 0; i < iommu_idmaps->len; i++) { + range = &g_array_index(iommu_idmaps, AcpiRimtIdMapping, i); + if (virt_is_iommu_sys_enabled(s)) { + range->source_id_base = 0; + } else { + range->source_id_base = s->pci_iommu_bdf + 1; + } + range->num_ids = 0xffff - s->pci_iommu_bdf; + build_rimt_id_mapping(table_data, range->source_id_base, + range->num_ids, iommu_offset); + } + + acpi_table_end(linker, &table); +} + /* * ACPI spec, Revision 6.5+ * 5.2.16 System Resource Affinity Table (SRAT) @@ -641,6 +888,11 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables_blob); build_rhct(tables_blob, tables->linker, s); + if (virt_is_iommu_sys_enabled(s) || s->pci_iommu_bdf) { + acpi_add_table(table_offsets, tables_blob); + build_rimt(tables_blob, tables->linker, s); + } + acpi_add_table(table_offsets, tables_blob); spcr_setup(tables_blob, tables->linker, s); diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 5676d66..cf280a9 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -27,11 +27,13 @@ #include "hw/loader.h" #include "hw/sysbus.h" #include "hw/qdev-properties.h" -#include "hw/char/serial.h" +#include "hw/char/serial-mm.h" #include "target/riscv/cpu.h" #include "hw/core/sysbus-fdt.h" #include "target/riscv/pmu.h" #include "hw/riscv/riscv_hart.h" +#include "hw/riscv/iommu.h" +#include "hw/riscv/riscv-iommu-bits.h" #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" @@ -43,23 +45,33 @@ #include "hw/misc/sifive_test.h" #include "hw/platform-bus.h" #include "chardev/char.h" -#include "sysemu/device_tree.h" -#include "sysemu/sysemu.h" -#include "sysemu/tcg.h" -#include "sysemu/kvm.h" -#include "sysemu/tpm.h" -#include "sysemu/qtest.h" +#include "system/device_tree.h" +#include "system/system.h" +#include "system/tcg.h" +#include "system/kvm.h" +#include "system/tpm.h" +#include "system/qtest.h" #include "hw/pci/pci.h" #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" #include "hw/acpi/aml-build.h" #include "qapi/qapi-visit-common.h" #include "hw/virtio/virtio-iommu.h" +#include "hw/uefi/var-service-api.h" /* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ -static bool virt_use_kvm_aia(RISCVVirtState *s) +static bool virt_use_kvm_aia_aplic_imsic(RISCVVirtAIAType aia_type) { - return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; + + return riscv_is_kvm_aia_aplic_imsic(msimode); +} + +static bool virt_use_emulated_aplic(RISCVVirtAIAType aia_type) +{ + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; + + return riscv_use_emulated_aplic(msimode); } static bool virt_aclint_allowed(void) @@ -75,6 +87,7 @@ static const MemMapEntry virt_memmap[] = { [VIRT_CLINT] = { 0x2000000, 0x10000 }, [VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 }, [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, + [VIRT_IOMMU_SYS] = { 0x3010000, 0x1000 }, [VIRT_PLATFORM_BUS] = { 0x4000000, 0x2000000 }, [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, [VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) }, @@ -153,8 +166,8 @@ static void virt_flash_map1(PFlashCFI01 *flash, static void virt_flash_map(RISCVVirtState *s, MemoryRegion *sysmem) { - hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; - hwaddr flashbase = virt_memmap[VIRT_FLASH].base; + hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = s->memmap[VIRT_FLASH].base; virt_flash_map1(s->flash[0], flashbase, flashsize, sysmem); @@ -167,7 +180,7 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename, { int pin, dev; uint32_t irq_map_stride = 0; - uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * + uint32_t full_irq_map[PCI_NUM_PINS * PCI_NUM_PINS * FDT_MAX_INT_MAP_WIDTH] = {}; uint32_t *irq_map = full_irq_map; @@ -179,11 +192,11 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename, * possible slot) seeing the interrupt-map-mask will allow the table * to wrap to any number of devices. */ - for (dev = 0; dev < GPEX_NUM_IRQS; dev++) { + for (dev = 0; dev < PCI_NUM_PINS; dev++) { int devfn = dev * 0x8; - for (pin = 0; pin < GPEX_NUM_IRQS; pin++) { - int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS); + for (pin = 0; pin < PCI_NUM_PINS; pin++) { + int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % PCI_NUM_PINS); int i = 0; /* Fill PCI address cells */ @@ -209,7 +222,7 @@ static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename, } qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map, - GPEX_NUM_IRQS * GPEX_NUM_IRQS * + PCI_NUM_PINS * PCI_NUM_PINS * irq_map_stride * sizeof(uint32_t)); qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask", @@ -224,10 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, uint32_t cpu_phandle; MachineState *ms = MACHINE(s); bool is_32_bit = riscv_is_32bit(&s->soc[0]); - uint8_t satp_mode_max; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu]; + int8_t satp_mode_max = cpu_ptr->cfg.max_satp_mode; g_autofree char *cpu_name = NULL; g_autofree char *core_name = NULL; g_autofree char *intc_name = NULL; @@ -239,8 +252,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); - if (cpu_ptr->cfg.satp_mode.supported != 0) { - satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map); + if (satp_mode_max != -1) { sv_name = g_strdup_printf("riscv,%s", satp_mode_str(satp_mode_max, is_32_bit)); qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); @@ -288,16 +300,16 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, } } -static void create_fdt_socket_memory(RISCVVirtState *s, - const MemMapEntry *memmap, int socket) +static void create_fdt_socket_memory(RISCVVirtState *s, int socket) { g_autofree char *mem_name = NULL; - uint64_t addr, size; + hwaddr addr; + uint64_t size; MachineState *ms = MACHINE(s); - addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket); + addr = s->memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket); size = riscv_socket_mem_size(ms, socket); - mem_name = g_strdup_printf("/memory@%lx", (long)addr); + mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", addr >> 32, addr, size >> 32, size); @@ -306,7 +318,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s, } static void create_fdt_socket_clint(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *intc_phandles) { int cpu; @@ -327,21 +339,22 @@ static void create_fdt_socket_clint(RISCVVirtState *s, clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); } - clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + clint_addr = s->memmap[VIRT_CLINT].base + + (s->memmap[VIRT_CLINT].size * socket); clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); + 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); } static void create_fdt_socket_aclint(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *intc_phandles) { int cpu; @@ -368,8 +381,10 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2; if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + addr = s->memmap[VIRT_CLINT].base + + (s->memmap[VIRT_CLINT].size * socket); name = g_strdup_printf("/soc/mswi@%lx", addr); + qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); @@ -384,13 +399,13 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_CLINT].base + + addr = s->memmap[VIRT_CLINT].base + (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket); size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE; } else { - addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + - (memmap[VIRT_CLINT].size * socket); - size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; + addr = s->memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + + (s->memmap[VIRT_CLINT].size * socket); + size = s->memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; } name = g_strdup_printf("/soc/mtimer@%lx", addr); qemu_fdt_add_subnode(ms->fdt, name); @@ -407,14 +422,15 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, g_free(name); if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_ACLINT_SSWI].base + - (memmap[VIRT_ACLINT_SSWI].size * socket); + addr = s->memmap[VIRT_ACLINT_SSWI].base + + (s->memmap[VIRT_ACLINT_SSWI].size * socket); + name = g_strdup_printf("/soc/sswi@%lx", addr); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); + 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -425,7 +441,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } static void create_fdt_socket_plic(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *phandle, uint32_t *intc_phandles, uint32_t *plic_phandles) { @@ -439,7 +455,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, }; plic_phandles[socket] = (*phandle)++; - plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); + plic_addr = s->memmap[VIRT_PLIC].base + + (s->memmap[VIRT_PLIC].size * socket); plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); qemu_fdt_add_subnode(ms->fdt, plic_name); qemu_fdt_setprop_cell(ms->fdt, plic_name, @@ -478,7 +495,7 @@ static void create_fdt_socket_plic(RISCVVirtState *s, } qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); + 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); @@ -487,8 +504,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, if (!socket) { platform_bus_add_all_fdt_nodes(ms->fdt, plic_name, - memmap[VIRT_PLATFORM_BUS].base, - memmap[VIRT_PLATFORM_BUS].size, + s->memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].size, VIRT_PLATFORM_BUS_IRQ); } } @@ -515,6 +532,9 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size; g_autofree uint32_t *imsic_cells = NULL; g_autofree uint32_t *imsic_regs = NULL; + static const char * const imsic_compat[2] = { + "qemu,imsics", "riscv,imsics" + }; imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); imsic_regs = g_new0(uint32_t, socket_count * 4); @@ -538,9 +558,13 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, } } - imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); + imsic_name = g_strdup_printf("/soc/interrupt-controller@%lx", + (unsigned long)base_addr); qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); + qemu_fdt_setprop_string_array(ms->fdt, imsic_name, "compatible", + (char **)&imsic_compat, + ARRAY_SIZE(imsic_compat)); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", FDT_IMSIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); @@ -568,7 +592,7 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); } -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_imsic(RISCVVirtState *s, uint32_t *phandle, uint32_t *intc_phandles, uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) { @@ -577,17 +601,23 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, if (!kvm_enabled()) { /* M-level IMSIC node */ - create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, + create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_M].base, intc_phandles, *msi_m_phandle, true, 0); } /* S-level IMSIC node */ - create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, + create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_S].base, intc_phandles, *msi_s_phandle, false, imsic_num_bits(s->aia_guests + 1)); } +/* Caller must free string after use */ +static char *fdt_get_aplic_nodename(unsigned long aplic_addr) +{ + return g_strdup_printf("/soc/interrupt-controller@%lx", aplic_addr); +} + static void create_fdt_one_aplic(RISCVVirtState *s, int socket, unsigned long aplic_addr, uint32_t aplic_size, uint32_t msi_phandle, @@ -597,18 +627,24 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, bool m_mode, int num_harts) { int cpu; - g_autofree char *aplic_name = NULL; + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); g_autofree uint32_t *aplic_cells = g_new0(uint32_t, num_harts * 2); MachineState *ms = MACHINE(s); + static const char * const aplic_compat[2] = { + "qemu,aplic", "riscv,aplic" + }; for (cpu = 0; cpu < num_harts; cpu++) { aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_string_array(ms->fdt, aplic_name, "compatible", + (char **)&aplic_compat, + ARRAY_SIZE(aplic_compat)); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#address-cells", + FDT_APLIC_ADDR_CELLS); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#interrupt-cells", FDT_APLIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); @@ -628,6 +664,15 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, if (aplic_child_phandle) { qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", aplic_child_phandle); + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegation", + aplic_child_phandle, 0x1, + VIRT_IRQCHIP_NUM_SOURCES); + /* + * DEPRECATED_9.1: Compat property kept temporarily + * to allow old firmwares to work with AIA. Do *not* + * use 'riscv,delegate' in new code: use + * 'riscv,delegation' instead. + */ qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", aplic_child_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); @@ -638,7 +683,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, } static void create_fdt_socket_aplic(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t msi_m_phandle, uint32_t msi_s_phandle, uint32_t *phandle, @@ -646,7 +691,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, uint32_t *aplic_phandles, int num_harts) { - g_autofree char *aplic_name = NULL; unsigned long aplic_addr; MachineState *ms = MACHINE(s); uint32_t aplic_m_phandle, aplic_s_phandle; @@ -656,28 +700,28 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, if (!kvm_enabled()) { /* M-level APLIC node */ - aplic_addr = memmap[VIRT_APLIC_M].base + - (memmap[VIRT_APLIC_M].size * socket); - create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, + aplic_addr = s->memmap[VIRT_APLIC_M].base + + (s->memmap[VIRT_APLIC_M].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, + s->memmap[VIRT_APLIC_M].size, msi_m_phandle, intc_phandles, aplic_m_phandle, aplic_s_phandle, true, num_harts); } /* S-level APLIC node */ - aplic_addr = memmap[VIRT_APLIC_S].base + - (memmap[VIRT_APLIC_S].size * socket); - create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, + aplic_addr = s->memmap[VIRT_APLIC_S].base + + (s->memmap[VIRT_APLIC_S].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, s->memmap[VIRT_APLIC_S].size, msi_s_phandle, intc_phandles, aplic_s_phandle, 0, false, num_harts); - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - if (!socket) { + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, - memmap[VIRT_PLATFORM_BUS].base, - memmap[VIRT_PLATFORM_BUS].size, + s->memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].size, VIRT_PLATFORM_BUS_IRQ); } @@ -695,7 +739,7 @@ static void create_fdt_pmu(RISCVVirtState *s) riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name); } -static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_sockets(RISCVVirtState *s, uint32_t *phandle, uint32_t *irq_mmio_phandle, uint32_t *irq_pcie_phandle, @@ -712,7 +756,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_add_subnode(ms->fdt, "/cpus"); qemu_fdt_setprop_cell(ms->fdt, "/cpus", "timebase-frequency", kvm_enabled() ? - kvm_riscv_get_timebase_frequency(first_cpu) : + kvm_riscv_get_timebase_frequency(&s->soc->harts[0]) : RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ); qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1); @@ -731,53 +775,54 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, create_fdt_socket_cpus(s, socket, clust_name, phandle, &intc_phandles[phandle_pos]); - create_fdt_socket_memory(s, memmap, socket); + create_fdt_socket_memory(s, socket); if (virt_aclint_allowed() && s->have_aclint) { - create_fdt_socket_aclint(s, memmap, socket, + create_fdt_socket_aclint(s, socket, &intc_phandles[phandle_pos]); } else if (tcg_enabled()) { - create_fdt_socket_clint(s, memmap, socket, + create_fdt_socket_clint(s, socket, &intc_phandles[phandle_pos]); } } if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { - create_fdt_imsic(s, memmap, phandle, intc_phandles, - &msi_m_phandle, &msi_s_phandle); + create_fdt_imsic(s, phandle, intc_phandles, + &msi_m_phandle, &msi_s_phandle); *msi_pcie_phandle = msi_s_phandle; } - /* KVM AIA only has one APLIC instance */ - if (kvm_enabled() && virt_use_kvm_aia(s)) { - create_fdt_socket_aplic(s, memmap, 0, + /* + * With KVM AIA aplic-imsic, using an irqchip without split + * mode, we'll use only one APLIC instance. + */ + if (!virt_use_emulated_aplic(s->aia_type)) { + create_fdt_socket_aplic(s, 0, msi_m_phandle, msi_s_phandle, phandle, &intc_phandles[0], xplic_phandles, ms->smp.cpus); + + *irq_mmio_phandle = xplic_phandles[0]; + *irq_virtio_phandle = xplic_phandles[0]; + *irq_pcie_phandle = xplic_phandles[0]; } else { phandle_pos = ms->smp.cpus; for (socket = (socket_count - 1); socket >= 0; socket--) { phandle_pos -= s->soc[socket].num_harts; if (s->aia_type == VIRT_AIA_TYPE_NONE) { - create_fdt_socket_plic(s, memmap, socket, phandle, + create_fdt_socket_plic(s, socket, phandle, &intc_phandles[phandle_pos], xplic_phandles); } else { - create_fdt_socket_aplic(s, memmap, socket, + create_fdt_socket_aplic(s, socket, msi_m_phandle, msi_s_phandle, phandle, &intc_phandles[phandle_pos], xplic_phandles, s->soc[socket].num_harts); } } - } - if (kvm_enabled() && virt_use_kvm_aia(s)) { - *irq_mmio_phandle = xplic_phandles[0]; - *irq_virtio_phandle = xplic_phandles[0]; - *irq_pcie_phandle = xplic_phandles[0]; - } else { for (socket = 0; socket < socket_count; socket++) { if (socket == 0) { *irq_mmio_phandle = xplic_phandles[socket]; @@ -797,21 +842,24 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, riscv_socket_fdt_write_distance_matrix(ms); } -static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t irq_virtio_phandle) +static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) { int i; MachineState *ms = MACHINE(s); + hwaddr virtio_base = s->memmap[VIRT_VIRTIO].base; for (i = 0; i < VIRTIO_COUNT; i++) { - g_autofree char *name = g_strdup_printf("/soc/virtio_mmio@%lx", - (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size)); + g_autofree char *name = NULL; + uint64_t size = s->memmap[VIRT_VIRTIO].size; + hwaddr addr = virtio_base + i * size; + + name = g_strdup_printf("/soc/virtio_mmio@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, - 0x0, memmap[VIRT_VIRTIO].size); + 0x0, addr, + 0x0, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -824,15 +872,16 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, } } -static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_pcie(RISCVVirtState *s, uint32_t irq_pcie_phandle, - uint32_t msi_pcie_phandle) + uint32_t msi_pcie_phandle, + uint32_t iommu_sys_phandle) { g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/pci@%lx", - (long) memmap[VIRT_PCIE_ECAM].base); + name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS); qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells", @@ -843,36 +892,41 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci"); qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0); qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0, - memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); + s->memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0); if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); + s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, - 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size, + 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, 1, FDT_PCI_RANGE_MMIO, - 2, memmap[VIRT_PCIE_MMIO].base, - 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size, + 2, s->memmap[VIRT_PCIE_MMIO].base, + 2, s->memmap[VIRT_PCIE_MMIO].base, 2, s->memmap[VIRT_PCIE_MMIO].size, 1, FDT_PCI_RANGE_MMIO_64BIT, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); + if (virt_is_iommu_sys_enabled(s)) { + qemu_fdt_setprop_cells(ms->fdt, name, "iommu-map", + 0, iommu_sys_phandle, 0, 0, 0, + iommu_sys_phandle, 0, 0xffff); + } + create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle); } -static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t *phandle) +static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) { char *name; uint32_t test_phandle; MachineState *ms = MACHINE(s); test_phandle = (*phandle)++; - name = g_strdup_printf("/soc/test@%lx", - (long)memmap[VIRT_TEST].base); + name = g_strdup_printf("/soc/test@%"HWADDR_PRIx, + s->memmap[VIRT_TEST].base); qemu_fdt_add_subnode(ms->fdt, name); { static const char * const compat[3] = { @@ -882,7 +936,7 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, (char **)&compat, ARRAY_SIZE(compat)); } qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size); + 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); @@ -904,18 +958,19 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, g_free(name); } -static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_uart(RISCVVirtState *s, uint32_t irq_mmio_phandle) { g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/serial@%lx", (long)memmap[VIRT_UART0].base); + name = g_strdup_printf("/soc/serial@%"HWADDR_PRIx, + s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_UART0].base, - 0x0, memmap[VIRT_UART0].size); + 0x0, s->memmap[VIRT_UART0].base, + 0x0, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -925,20 +980,22 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, } qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name); + qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name); } -static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_rtc(RISCVVirtState *s, uint32_t irq_mmio_phandle) { g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base); + name = g_strdup_printf("/soc/rtc@%"HWADDR_PRIx, + s->memmap[VIRT_RTC].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); + 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -948,11 +1005,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, } } -static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt_flash(RISCVVirtState *s) { MachineState *ms = MACHINE(s); - hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; - hwaddr flashbase = virt_memmap[VIRT_FLASH].base; + hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = s->memmap[VIRT_FLASH].base; g_autofree char *name = g_strdup_printf("/flash@%" PRIx64, flashbase); qemu_fdt_add_subnode(ms->fdt, name); @@ -963,11 +1020,11 @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4); } -static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt_fw_cfg(RISCVVirtState *s) { MachineState *ms = MACHINE(s); - hwaddr base = memmap[VIRT_FW_CFG].base; - hwaddr size = memmap[VIRT_FW_CFG].size; + hwaddr base = s->memmap[VIRT_FW_CFG].base; + hwaddr size = s->memmap[VIRT_FW_CFG].size; g_autofree char *nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); qemu_fdt_add_subnode(ms->fdt, nodename); @@ -986,8 +1043,8 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) g_autofree char *iommu_node = NULL; g_autofree char *pci_node = NULL; - pci_node = g_strdup_printf("/soc/pci@%lx", - (long) virt_memmap[VIRT_PCIE_ECAM].base); + pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); iommu_node = g_strdup_printf("%s/virtio_iommu@%x,%x", pci_node, PCI_SLOT(bdf), PCI_FUNC(bdf)); iommu_phandle = qemu_fdt_alloc_phandle(fdt); @@ -1006,27 +1063,99 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf); } +static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, + uint32_t msi_phandle, + uint32_t *iommu_sys_phandle) +{ + const char comp[] = "riscv,iommu"; + void *fdt = MACHINE(s)->fdt; + uint32_t iommu_phandle; + g_autofree char *iommu_node = NULL; + hwaddr addr = s->memmap[VIRT_IOMMU_SYS].base; + hwaddr size = s->memmap[VIRT_IOMMU_SYS].size; + uint32_t iommu_irq_map[RISCV_IOMMU_INTR_COUNT] = { + IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_CQ, + IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_FQ, + IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_PM, + IOMMU_SYS_IRQ + RISCV_IOMMU_INTR_PQ, + }; + + iommu_node = g_strdup_printf("/soc/iommu@%x", + (unsigned int) s->memmap[VIRT_IOMMU_SYS].base); + iommu_phandle = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_add_subnode(fdt, iommu_node); + + qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp)); + qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); + qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); + + qemu_fdt_setprop_cells(fdt, iommu_node, "reg", + addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); + + qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", + iommu_irq_map[0], FDT_IRQ_TYPE_EDGE_LOW, + iommu_irq_map[1], FDT_IRQ_TYPE_EDGE_LOW, + iommu_irq_map[2], FDT_IRQ_TYPE_EDGE_LOW, + iommu_irq_map[3], FDT_IRQ_TYPE_EDGE_LOW); + + qemu_fdt_setprop_cell(fdt, iommu_node, "msi-parent", msi_phandle); + + *iommu_sys_phandle = iommu_phandle; +} + +static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf) +{ + const char comp[] = "riscv,pci-iommu"; + void *fdt = MACHINE(s)->fdt; + uint32_t iommu_phandle; + g_autofree char *iommu_node = NULL; + g_autofree char *pci_node = NULL; + + pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); + iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf); + iommu_phandle = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_add_subnode(fdt, iommu_node); + + qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp)); + qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); + qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); + qemu_fdt_setprop_cells(fdt, iommu_node, "reg", + bdf << 8, 0, 0, 0, 0); + qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map", + 0, iommu_phandle, 0, bdf, + bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf); + s->pci_iommu_bdf = bdf; +} + static void finalize_fdt(RISCVVirtState *s) { uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1; uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; + uint32_t iommu_sys_phandle = 1; - create_fdt_sockets(s, virt_memmap, &phandle, &irq_mmio_phandle, + create_fdt_sockets(s, &phandle, &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle, &msi_pcie_phandle); - create_fdt_virtio(s, virt_memmap, irq_virtio_phandle); + create_fdt_virtio(s, irq_virtio_phandle); - create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle); + if (virt_is_iommu_sys_enabled(s)) { + create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle, + &iommu_sys_phandle); + } + create_fdt_pcie(s, irq_pcie_phandle, msi_pcie_phandle, + iommu_sys_phandle); - create_fdt_reset(s, virt_memmap, &phandle); + create_fdt_reset(s, &phandle); - create_fdt_uart(s, virt_memmap, irq_mmio_phandle); + create_fdt_uart(s, irq_mmio_phandle); - create_fdt_rtc(s, virt_memmap, irq_mmio_phandle); + create_fdt_rtc(s, irq_mmio_phandle); } -static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt(RISCVVirtState *s) { MachineState *ms = MACHINE(s); uint8_t rng_seed[32]; @@ -1053,7 +1182,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) * The "/soc/pci@..." node is needed for PCIE hotplugs * that might happen before finalize_fdt(). */ - name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base); + name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_add_subnode(ms->fdt, "/chosen"); @@ -1063,8 +1193,10 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed)); - create_fdt_flash(s, memmap); - create_fdt_fw_cfg(s, memmap); + qemu_fdt_add_subnode(ms->fdt, "/aliases"); + + create_fdt_flash(s); + create_fdt_fw_cfg(s); create_fdt_pmu(s); } @@ -1089,23 +1221,21 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, dev = qdev_new(TYPE_GPEX_HOST); /* Set GPEX object properties for the virt machine */ - object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_BASE, + object_property_set_uint(OBJECT(dev), PCI_HOST_ECAM_BASE, ecam_base, NULL); - object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ECAM_SIZE, + object_property_set_int(OBJECT(dev), PCI_HOST_ECAM_SIZE, ecam_size, NULL); - object_property_set_uint(OBJECT(GPEX_HOST(dev)), - PCI_HOST_BELOW_4G_MMIO_BASE, + object_property_set_uint(OBJECT(dev), PCI_HOST_BELOW_4G_MMIO_BASE, mmio_base, NULL); - object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_BELOW_4G_MMIO_SIZE, + object_property_set_int(OBJECT(dev), PCI_HOST_BELOW_4G_MMIO_SIZE, mmio_size, NULL); - object_property_set_uint(OBJECT(GPEX_HOST(dev)), - PCI_HOST_ABOVE_4G_MMIO_BASE, + object_property_set_uint(OBJECT(dev), PCI_HOST_ABOVE_4G_MMIO_BASE, high_mmio_base, NULL); - object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_ABOVE_4G_MMIO_SIZE, + object_property_set_int(OBJECT(dev), PCI_HOST_ABOVE_4G_MMIO_SIZE, high_mmio_size, NULL); - object_property_set_uint(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_BASE, + object_property_set_uint(OBJECT(dev), PCI_HOST_PIO_BASE, pio_base, NULL); - object_property_set_int(OBJECT(GPEX_HOST(dev)), PCI_HOST_PIO_SIZE, + object_property_set_int(OBJECT(dev), PCI_HOST_PIO_SIZE, pio_size, NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -1131,20 +1261,19 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base); - for (i = 0; i < GPEX_NUM_IRQS; i++) { + for (i = 0; i < PCI_NUM_PINS; i++) { irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i); sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i); } - GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(GPEX_HOST(dev))->bus; + GPEX_HOST(dev)->gpex_cfg.bus = PCI_HOST_BRIDGE(dev)->bus; return dev; } -static FWCfgState *create_fw_cfg(const MachineState *ms) +static FWCfgState *create_fw_cfg(const MachineState *ms, hwaddr base) { - hwaddr base = virt_memmap[VIRT_FW_CFG].base; FWCfgState *fw_cfg; fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, @@ -1157,27 +1286,22 @@ static FWCfgState *create_fw_cfg(const MachineState *ms) static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket, int base_hartid, int hart_count) { - DeviceState *ret; g_autofree char *plic_hart_config = NULL; /* Per-socket PLIC hart topology configuration string */ plic_hart_config = riscv_plic_hart_config_string(hart_count); /* Per-socket PLIC */ - ret = sifive_plic_create( - memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size, - plic_hart_config, hart_count, base_hartid, - VIRT_IRQCHIP_NUM_SOURCES, - ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1), - VIRT_PLIC_PRIORITY_BASE, - VIRT_PLIC_PENDING_BASE, - VIRT_PLIC_ENABLE_BASE, - VIRT_PLIC_ENABLE_STRIDE, - VIRT_PLIC_CONTEXT_BASE, - VIRT_PLIC_CONTEXT_STRIDE, - memmap[VIRT_PLIC].size); - - return ret; + return sifive_plic_create( + memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size, + plic_hart_config, hart_count, base_hartid, + VIRT_IRQCHIP_NUM_SOURCES, + ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1), + VIRT_PLIC_PRIORITY_BASE, VIRT_PLIC_PENDING_BASE, + VIRT_PLIC_ENABLE_BASE, VIRT_PLIC_ENABLE_STRIDE, + VIRT_PLIC_CONTEXT_BASE, + VIRT_PLIC_CONTEXT_STRIDE, + memmap[VIRT_PLIC].size); } static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, @@ -1185,7 +1309,7 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, int base_hartid, int hart_count) { int i; - hwaddr addr; + hwaddr addr = 0; uint32_t guest_bits; DeviceState *aplic_s = NULL; DeviceState *aplic_m = NULL; @@ -1235,6 +1359,10 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, VIRT_IRQCHIP_NUM_PRIO_BITS, msimode, false, aplic_m); + if (kvm_enabled() && msimode) { + riscv_aplic_set_kvm_msicfgaddr(RISCV_APLIC(aplic_s), addr); + } + return kvm_enabled() ? aplic_s : aplic_m; } @@ -1242,14 +1370,13 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) { DeviceState *dev; SysBusDevice *sysbus; - const MemMapEntry *memmap = virt_memmap; int i; MemoryRegion *sysmem = get_system_memory(); dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE); dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE); qdev_prop_set_uint32(dev, "num_irqs", VIRT_PLATFORM_BUS_NUM_IRQS); - qdev_prop_set_uint32(dev, "mmio_size", memmap[VIRT_PLATFORM_BUS].size); + qdev_prop_set_uint32(dev, "mmio_size", s->memmap[VIRT_PLATFORM_BUS].size); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); s->platform_bus_dev = dev; @@ -1260,7 +1387,7 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) } memory_region_add_subregion(sysmem, - memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].base, sysbus_mmio_get_region(sysbus, 0)); } @@ -1307,14 +1434,14 @@ static void virt_machine_done(Notifier *notifier, void *data) { RISCVVirtState *s = container_of(notifier, RISCVVirtState, machine_done); - const MemMapEntry *memmap = virt_memmap; MachineState *machine = MACHINE(s); - target_ulong start_addr = memmap[VIRT_DRAM].base; + hwaddr start_addr = s->memmap[VIRT_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; const char *firmware_name = riscv_default_firmware_name(&s->soc[0]); uint64_t fdt_load_addr; uint64_t kernel_entry = 0; BlockBackend *pflash_blk0; + RISCVBootInfo boot_info; /* * An user provided dtb must include everything, including @@ -1341,7 +1468,7 @@ static void virt_machine_done(Notifier *notifier, void *data) } firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name, - start_addr, NULL); + &start_addr, NULL); pflash_blk0 = pflash_cfi01_get_blk(s->flash[0]); if (pflash_blk0) { @@ -1352,34 +1479,36 @@ static void virt_machine_done(Notifier *notifier, void *data) * let's overwrite the address we jump to after reset to * the base of the flash. */ - start_addr = virt_memmap[VIRT_FLASH].base; + start_addr = s->memmap[VIRT_FLASH].base; } else { /* * Pflash was supplied but either KVM guest or bios is not none. * In this case, base of the flash would contain S-mode payload. */ riscv_setup_firmware_boot(machine); - kernel_entry = virt_memmap[VIRT_FLASH].base; + kernel_entry = s->memmap[VIRT_FLASH].base; } } + riscv_boot_info_init(&boot_info, &s->soc[0]); + if (machine->kernel_filename && !kernel_entry) { - kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0], + kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info, firmware_end_addr); - - kernel_entry = riscv_load_kernel(machine, &s->soc[0], - kernel_start_addr, true, NULL); + riscv_load_kernel(machine, &boot_info, kernel_start_addr, + true, NULL); + kernel_entry = boot_info.image_low_addr; } - fdt_load_addr = riscv_compute_fdt_addr(memmap[VIRT_DRAM].base, - memmap[VIRT_DRAM].size, - machine); + fdt_load_addr = riscv_compute_fdt_addr(s->memmap[VIRT_DRAM].base, + s->memmap[VIRT_DRAM].size, + machine, &boot_info); riscv_load_fdt(fdt_load_addr, machine->fdt); /* load the reset vector */ riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr, - virt_memmap[VIRT_MROM].base, - virt_memmap[VIRT_MROM].size, kernel_entry, + s->memmap[VIRT_MROM].base, + s->memmap[VIRT_MROM].size, kernel_entry, fdt_load_addr); /* @@ -1400,7 +1529,6 @@ static void virt_machine_done(Notifier *notifier, void *data) static void virt_machine_init(MachineState *machine) { - const MemMapEntry *memmap = virt_memmap; RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); @@ -1408,6 +1536,8 @@ static void virt_machine_init(MachineState *machine) int i, base_hartid, hart_count; int socket_count = riscv_socket_count(machine); + s->memmap = virt_memmap; + /* Check socket count limit */ if (VIRT_SOCKETS_MAX < socket_count) { error_report("number of sockets/nodes should be less than %d", @@ -1455,7 +1585,7 @@ static void virt_machine_init(MachineState *machine) if (virt_aclint_allowed() && s->have_aclint) { if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { /* Per-socket ACLINT MTIMER */ - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, @@ -1464,28 +1594,28 @@ static void virt_machine_init(MachineState *machine) RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); } else { /* Per-socket ACLINT MSWI, MTIMER, and SSWI */ - riscv_aclint_swi_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size, + riscv_aclint_swi_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size, base_hartid, hart_count, false); - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size + + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); - riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base + - i * memmap[VIRT_ACLINT_SSWI].size, + riscv_aclint_swi_create(s->memmap[VIRT_ACLINT_SSWI].base + + i * s->memmap[VIRT_ACLINT_SSWI].size, base_hartid, hart_count, true); } } else if (tcg_enabled()) { /* Per-socket SiFive CLINT */ riscv_aclint_swi_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, + s->memmap[VIRT_CLINT].base + i * s->memmap[VIRT_CLINT].size, base_hartid, hart_count, false); - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); @@ -1493,11 +1623,11 @@ static void virt_machine_init(MachineState *machine) /* Per-socket interrupt controller */ if (s->aia_type == VIRT_AIA_TYPE_NONE) { - s->irqchip[i] = virt_create_plic(memmap, i, + s->irqchip[i] = virt_create_plic(s->memmap, i, base_hartid, hart_count); } else { s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests, - memmap, i, base_hartid, + s->memmap, i, base_hartid, hart_count); } @@ -1516,11 +1646,11 @@ static void virt_machine_init(MachineState *machine) } } - if (kvm_enabled() && virt_use_kvm_aia(s)) { + if (kvm_enabled() && virt_use_kvm_aia_aplic_imsic(s->aia_type)) { kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, - memmap[VIRT_APLIC_S].base, - memmap[VIRT_IMSIC_S].base, + s->memmap[VIRT_APLIC_S].base, + s->memmap[VIRT_IMSIC_S].base, s->aia_guests); } @@ -1536,37 +1666,36 @@ static void virt_machine_init(MachineState *machine) virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE; } else { virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE; - virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size; + virt_high_pcie_memmap.base = s->memmap[VIRT_DRAM].base + + machine->ram_size; virt_high_pcie_memmap.base = ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); } - s->memmap = virt_memmap; - /* register system main memory (actual RAM) */ - memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, - machine->ram); + memory_region_add_subregion(system_memory, s->memmap[VIRT_DRAM].base, + machine->ram); /* boot rom */ memory_region_init_rom(mask_rom, NULL, "riscv_virt_board.mrom", - memmap[VIRT_MROM].size, &error_fatal); - memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, + s->memmap[VIRT_MROM].size, &error_fatal); + memory_region_add_subregion(system_memory, s->memmap[VIRT_MROM].base, mask_rom); /* * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the * device tree cannot be altered and we get FDT_ERR_NOSPACE. */ - s->fw_cfg = create_fw_cfg(machine); + s->fw_cfg = create_fw_cfg(machine, s->memmap[VIRT_FW_CFG].base); rom_set_fw(s->fw_cfg); /* SiFive Test MMIO device */ - sifive_test_create(memmap[VIRT_TEST].base); + sifive_test_create(s->memmap[VIRT_TEST].base); /* VirtIO MMIO devices */ for (i = 0; i < VIRTIO_COUNT; i++) { sysbus_create_simple("virtio-mmio", - memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, + s->memmap[VIRT_VIRTIO].base + i * s->memmap[VIRT_VIRTIO].size, qdev_get_gpio_in(virtio_irqchip, VIRTIO_IRQ + i)); } @@ -1574,11 +1703,11 @@ static void virt_machine_init(MachineState *machine) create_platform_bus(s, mmio_irqchip); - serial_mm_init(system_memory, memmap[VIRT_UART0].base, + serial_mm_init(system_memory, s->memmap[VIRT_UART0].base, 0, qdev_get_gpio_in(mmio_irqchip, UART0_IRQ), 399193, serial_hd(0), DEVICE_LITTLE_ENDIAN); - sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, + sysbus_create_simple("goldfish_rtc", s->memmap[VIRT_RTC].base, qdev_get_gpio_in(mmio_irqchip, RTC_IRQ)); for (i = 0; i < ARRAY_SIZE(s->flash); i++) { @@ -1596,7 +1725,23 @@ static void virt_machine_init(MachineState *machine) exit(1); } } else { - create_fdt(s, memmap); + create_fdt(s); + } + + if (virt_is_iommu_sys_enabled(s)) { + DeviceState *iommu_sys = qdev_new(TYPE_RISCV_IOMMU_SYS); + + object_property_set_uint(OBJECT(iommu_sys), "addr", + s->memmap[VIRT_IOMMU_SYS].base, + &error_fatal); + object_property_set_uint(OBJECT(iommu_sys), "base-irq", + IOMMU_SYS_IRQ, + &error_fatal); + object_property_set_link(OBJECT(iommu_sys), "irqchip", + OBJECT(mmio_irqchip), + &error_fatal); + + sysbus_realize_and_unref(SYS_BUS_DEVICE(iommu_sys), &error_fatal); } s->machine_done.notify = virt_machine_done; @@ -1612,6 +1757,7 @@ static void virt_machine_instance_init(Object *obj) s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); s->acpi = ON_OFF_AUTO_AUTO; + s->iommu_sys = ON_OFF_AUTO_AUTO; } static char *virt_get_aia_guests(Object *obj, Error **errp) @@ -1684,6 +1830,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) s->have_aclint = value; } +bool virt_is_iommu_sys_enabled(RISCVVirtState *s) +{ + return s->iommu_sys == ON_OFF_AUTO_ON; +} + +static void virt_get_iommu_sys(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + OnOffAuto iommu_sys = s->iommu_sys; + + visit_type_OnOffAuto(v, name, &iommu_sys, errp); +} + +static void virt_set_iommu_sys(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &s->iommu_sys, errp); +} + bool virt_is_acpi_enabled(RISCVVirtState *s) { return s->acpi != ON_OFF_AUTO_OFF; @@ -1710,11 +1878,15 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, DeviceState *dev) { MachineClass *mc = MACHINE_GET_CLASS(machine); + RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); if (device_is_dynamic_sysbus(mc, dev) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) { + s->iommu_sys = ON_OFF_AUTO_OFF; return HOTPLUG_HANDLER(machine); } + return NULL; } @@ -1735,9 +1907,14 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev))); } + + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) { + create_fdt_iommu(s, pci_get_bdf(PCI_DEVICE(dev))); + s->iommu_sys = ON_OFF_AUTO_OFF; + } } -static void virt_machine_class_init(ObjectClass *oc, void *data) +static void virt_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); @@ -1746,6 +1923,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->init = virt_machine_init; mc->max_cpus = VIRT_CPUS_MAX; mc->default_cpu_type = TYPE_RISCV_CPU_BASE; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; mc->pci_allow_0_address = true; mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; @@ -1760,6 +1939,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) hc->plug = virt_machine_device_plug_cb; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS); #ifdef CONFIG_TPM machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); #endif @@ -1794,6 +1974,12 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, "acpi", "Enable ACPI"); + + object_class_property_add(oc, "iommu-sys", "OnOffAuto", + virt_get_iommu_sys, virt_set_iommu_sys, + NULL, NULL); + object_class_property_set_description(oc, "iommu-sys", + "Enable IOMMU platform device"); } static const TypeInfo virt_machine_typeinfo = { @@ -1802,7 +1988,7 @@ static const TypeInfo virt_machine_typeinfo = { .class_init = virt_machine_class_init, .instance_init = virt_machine_instance_init, .instance_size = sizeof(RISCVVirtState), - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { } }, |