diff options
40 files changed, 2093 insertions, 686 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f144b5a..19f67dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2198,6 +2198,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com> S: Supported F: backends/iommufd.c F: include/sysemu/iommufd.h +F: backends/host_iommu_device.c +F: include/sysemu/host_iommu_device.h F: include/qemu/chardev_open.h F: util/chardev_open.c F: docs/devel/vfio-iommufd.rst diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c new file mode 100644 index 0000000..8f2dda1 --- /dev/null +++ b/backends/host_iommu_device.c @@ -0,0 +1,33 @@ +/* + * Host IOMMU device abstract + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/host_iommu_device.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice, + host_iommu_device, + HOST_IOMMU_DEVICE, + OBJECT) + +static void host_iommu_device_class_init(ObjectClass *oc, void *data) +{ +} + +static void host_iommu_device_init(Object *obj) +{ +} + +static void host_iommu_device_finalize(Object *obj) +{ + HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj); + + g_free(hiod->name); +} diff --git a/backends/iommufd.c b/backends/iommufd.c index c506afb..84fefbc 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -208,23 +208,69 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, return ret; } -static const TypeInfo iommufd_backend_info = { - .name = TYPE_IOMMUFD_BACKEND, - .parent = TYPE_OBJECT, - .instance_size = sizeof(IOMMUFDBackend), - .instance_init = iommufd_backend_init, - .instance_finalize = iommufd_backend_finalize, - .class_size = sizeof(IOMMUFDBackendClass), - .class_init = iommufd_backend_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp) +{ + struct iommu_hw_info info = { + .size = sizeof(info), + .dev_id = devid, + .data_len = len, + .data_uptr = (uintptr_t)data, + }; + + if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { + error_setg_errno(errp, errno, "Failed to get hardware info"); + return false; } -}; -static void register_types(void) + g_assert(type); + *type = info.out_data_type; + + return true; +} + +static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) { - type_register_static(&iommufd_backend_info); + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: + return caps->type; + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } } -type_init(register_types); +static void hiod_iommufd_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->get_cap = hiod_iommufd_get_cap; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_IOMMUFD_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(IOMMUFDBackend), + .instance_init = iommufd_backend_init, + .instance_finalize = iommufd_backend_finalize, + .class_size = sizeof(IOMMUFDBackendClass), + .class_init = iommufd_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_iommufd_class_init, + .abstract = true, + } +}; + +DEFINE_TYPES(types) diff --git a/backends/meson.build b/backends/meson.build index 8b2b111..106312f 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -16,6 +16,7 @@ if host_os != 'windows' endif if host_os == 'linux' system_ss.add(files('hostmem-memfd.c')) + system_ss.add(files('host_iommu_device.c')) endif if keyutils.found() system_ss.add(keyutils, files('cryptodev-lkcf.c')) diff --git a/configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak index f688ffa..917980e 100644 --- a/configs/targets/riscv64-softmmu.mak +++ b/configs/targets/riscv64-softmmu.mak @@ -1,6 +1,7 @@ TARGET_ARCH=riscv64 TARGET_BASE_ARCH=riscv TARGET_SUPPORTS_MTTCG=y +TARGET_KVM_HAVE_GUEST_DEBUG=y TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml # needed by boot.c TARGET_NEED_FDT=y diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c4350e0..37c21a0a 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -61,6 +61,12 @@ struct vtd_as_key { uint32_t pasid; }; +/* bus/devfn is PCI device's real BDF not the aliased one */ +struct vtd_hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v) return (guint)(value << 8 | key->devfn); } +/* Same implementation as vtd_as_hash() */ +static guint vtd_hiod_hash(gconstpointer v) +{ + return vtd_as_hash(v); +} + +static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_hiod_key *key1 = v1; + const struct vtd_hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static void vtd_hiod_destroy(gpointer v) +{ + object_unref(v); +} + static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, gpointer user_data) { @@ -3812,6 +3837,87 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, + Error **errp) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + int ret; + + if (!hiodc->get_cap) { + error_setg(errp, ".get_cap() not implemented"); + return false; + } + + /* Common checks */ + ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp); + if (ret < 0) { + return false; + } + if (s->aw_bits > ret) { + error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret); + return false; + } + + return true; +} + +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + struct vtd_as_key *new_key; + + assert(hiod); + + vtd_iommu_lock(s); + + if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + error_setg(errp, "Host IOMMU device already exist"); + vtd_iommu_unlock(s); + return false; + } + + if (!vtd_check_hiod(s, hiod, errp)) { + vtd_iommu_unlock(s); + return false; + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod); + + vtd_iommu_unlock(s); + + return true; +} + +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + + vtd_iommu_lock(s); + + if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + vtd_iommu_unlock(s); + return; + } + + g_hash_table_remove(s->vtd_host_iommu_dev, &key); + + vtd_iommu_unlock(s); +} + /* Unmap the whole range in the notifier's scope. */ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) { @@ -3934,30 +4040,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) return; } -/* Do the initialization. It will also be called when reset, so pay - * attention when adding new initialization stuff. - */ -static void vtd_init(IntelIOMMUState *s) +static void vtd_cap_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - memset(s->csr, 0, DMAR_REG_SIZE); - memset(s->wmask, 0, DMAR_REG_SIZE); - memset(s->w1cmask, 0, DMAR_REG_SIZE); - memset(s->womask, 0, DMAR_REG_SIZE); - - s->root = 0; - s->root_scalable = false; - s->dmar_enabled = false; - s->intr_enabled = false; - s->iq_head = 0; - s->iq_tail = 0; - s->iq = 0; - s->iq_size = 0; - s->qi_enabled = false; - s->iq_last_desc_type = VTD_INV_DESC_NONE; - s->iq_dw = false; - s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | VTD_CAP_MGAW(s->aw_bits); @@ -3974,27 +4060,6 @@ static void vtd_init(IntelIOMMUState *s) } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; - /* - * Rsvd field masks for spte - */ - vtd_spte_rsvd[0] = ~0ULL; - vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); - - vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - - if (s->scalable_mode || s->snoop_control) { - vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; - } - if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -4027,6 +4092,56 @@ static void vtd_init(IntelIOMMUState *s) if (s->pasid) { s->ecap |= VTD_ECAP_PASID; } +} + +/* + * Do the initialization. It will also be called when reset, so pay + * attention when adding new initialization stuff. + */ +static void vtd_init(IntelIOMMUState *s) +{ + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + memset(s->csr, 0, DMAR_REG_SIZE); + memset(s->wmask, 0, DMAR_REG_SIZE); + memset(s->w1cmask, 0, DMAR_REG_SIZE); + memset(s->womask, 0, DMAR_REG_SIZE); + + s->root = 0; + s->root_scalable = false; + s->dmar_enabled = false; + s->intr_enabled = false; + s->iq_head = 0; + s->iq_tail = 0; + s->iq = 0; + s->iq_size = 0; + s->qi_enabled = false; + s->iq_last_desc_type = VTD_INV_DESC_NONE; + s->iq_dw = false; + s->next_frcd_reg = 0; + + vtd_cap_init(s); + + /* + * Rsvd field masks for spte + */ + vtd_spte_rsvd[0] = ~0ULL; + vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + + if (s->scalable_mode || s->snoop_control) { + vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; + } vtd_reset_caches(s); @@ -4107,6 +4222,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, + .set_iommu_device = vtd_dev_set_iommu_device, + .unset_iommu_device = vtd_dev_unset_iommu_device, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) @@ -4226,6 +4343,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); + s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal, + g_free, vtd_hiod_destroy); vtd_init(s); pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 324c130..50b86d5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) } } -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +/* + * Get IOMMU root bus, aliased bus and devfn of a PCI device + * + * IOMMU root bus is needed by all call sites to call into iommu_ops. + * For call sites which don't need aliased BDF, passing NULL to + * aliased_[bus|devfn] is allowed. + * + * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device. + * + * @aliased_bus: return aliased #PCIBus of the PCI device, optional. + * + * @aliased_devfn: return aliased devfn of the PCI device, optional. + */ +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, + PCIBus **piommu_bus, + PCIBus **aliased_bus, + int *aliased_devfn) { PCIBus *bus = pci_get_bus(dev); PCIBus *iommu_bus = bus; - uint8_t devfn = dev->devfn; + int devfn = dev->devfn; while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); @@ -2693,13 +2709,70 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); + assert(iommu_bus); + + if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) { + iommu_bus = NULL; + } + + *piommu_bus = iommu_bus; + + if (aliased_bus) { + *aliased_bus = bus; + } + + if (aliased_devfn) { + *aliased_devfn = devfn; + } +} + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus) { return iommu_bus->iommu_ops->get_address_space(bus, iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp) +{ + PCIBus *iommu_bus, *aliased_bus; + int aliased_devfn; + + /* set_iommu_device requires device's direct BDF instead of aliased BDF */ + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, + &aliased_bus, &aliased_devfn); + if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) { + hiod->aliased_bus = aliased_bus; + hiod->aliased_devfn = aliased_devfn; + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn, hiod, errp); + } + return true; +} + +void pci_device_unset_iommu_device(PCIDevice *dev) +{ + PCIBus *iommu_bus; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) { + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn); + } +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 5676d66..bc0893e 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -515,6 +515,9 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size; g_autofree uint32_t *imsic_cells = NULL; g_autofree uint32_t *imsic_regs = NULL; + static const char * const imsic_compat[2] = { + "qemu,imsics", "riscv,imsics" + }; imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); imsic_regs = g_new0(uint32_t, socket_count * 4); @@ -538,13 +541,18 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, } } - imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); + imsic_name = g_strdup_printf("/soc/interrupt-controller@%lx", + (unsigned long)base_addr); qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); + qemu_fdt_setprop_string_array(ms->fdt, imsic_name, "compatible", + (char **)&imsic_compat, + ARRAY_SIZE(imsic_compat)); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", FDT_IMSIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#msi-cells", 0); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, @@ -588,6 +596,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, } +/* Caller must free string after use */ +static char *fdt_get_aplic_nodename(unsigned long aplic_addr) +{ + return g_strdup_printf("/soc/interrupt-controller@%lx", aplic_addr); +} + static void create_fdt_one_aplic(RISCVVirtState *s, int socket, unsigned long aplic_addr, uint32_t aplic_size, uint32_t msi_phandle, @@ -597,18 +611,24 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, bool m_mode, int num_harts) { int cpu; - g_autofree char *aplic_name = NULL; + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); g_autofree uint32_t *aplic_cells = g_new0(uint32_t, num_harts * 2); MachineState *ms = MACHINE(s); + static const char * const aplic_compat[2] = { + "qemu,aplic", "riscv,aplic" + }; for (cpu = 0; cpu < num_harts; cpu++) { aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_string_array(ms->fdt, aplic_name, "compatible", + (char **)&aplic_compat, + ARRAY_SIZE(aplic_compat)); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#address-cells", + FDT_APLIC_ADDR_CELLS); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "#interrupt-cells", FDT_APLIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); @@ -628,7 +648,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, if (aplic_child_phandle) { qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", aplic_child_phandle); - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegation", aplic_child_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); } @@ -646,7 +666,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, uint32_t *aplic_phandles, int num_harts) { - g_autofree char *aplic_name = NULL; unsigned long aplic_addr; MachineState *ms = MACHINE(s); uint32_t aplic_m_phandle, aplic_s_phandle; @@ -672,9 +691,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, aplic_s_phandle, 0, false, num_harts); - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - if (!socket) { + g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, memmap[VIRT_PLATFORM_BUS].base, memmap[VIRT_PLATFORM_BUS].size, @@ -1746,6 +1764,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->init = virt_machine_init; mc->max_cpus = VIRT_CPUS_MAX; mc->default_cpu_type = TYPE_RISCV_CPU_BASE; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; mc->pci_allow_0_address = true; mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; @@ -178,6 +178,17 @@ static const char *sd_version_str(enum SDPhySpecificationVersion version) return sdphy_version[version]; } +static const char *sd_mode_name(enum SDCardModes mode) +{ + static const char *mode_name[] = { + [sd_inactive] = "inactive", + [sd_card_identification_mode] = "identification", + [sd_data_transfer_mode] = "transfer", + }; + assert(mode < ARRAY_SIZE(mode_name)); + return mode_name[mode]; +} + static const char *sd_state_name(enum SDCardStates state) { static const char *state_name[] = { @@ -304,6 +315,8 @@ static uint8_t sd_crc7(const void *message, size_t width) return shift_reg; } +/* Operation Conditions register */ + #define OCR_POWER_DELAY_NS 500000 /* 0.5ms */ FIELD(OCR, VDD_VOLTAGE_WINDOW, 0, 24) @@ -353,6 +366,8 @@ static void sd_set_ocr(SDState *sd) } } +/* SD Configuration register */ + static void sd_set_scr(SDState *sd) { sd->scr[0] = 0 << 4; /* SCR structure version 1.0 */ @@ -375,6 +390,8 @@ static void sd_set_scr(SDState *sd) sd->scr[7] = 0x00; } +/* Card IDentification register */ + #define MID 0xaa #define OID "XY" #define PNM "QEMU!" @@ -393,16 +410,15 @@ static void sd_set_cid(SDState *sd) sd->cid[6] = PNM[3]; sd->cid[7] = PNM[4]; sd->cid[8] = PRV; /* Fake product revision (PRV) */ - sd->cid[9] = 0xde; /* Fake serial number (PSN) */ - sd->cid[10] = 0xad; - sd->cid[11] = 0xbe; - sd->cid[12] = 0xef; + stl_be_p(&sd->cid[9], 0xdeadbeef); /* Fake serial number (PSN) */ sd->cid[13] = 0x00 | /* Manufacture date (MDT) */ ((MDT_YR - 2000) / 10); sd->cid[14] = ((MDT_YR % 10) << 4) | MDT_MON; sd->cid[15] = (sd_crc7(sd->cid, 15) << 1) | 1; } +/* Card-Specific Data register */ + #define HWBLOCK_SHIFT 9 /* 512 bytes */ #define SECTOR_SHIFT 5 /* 16 kilobytes */ #define WPGROUP_SHIFT 7 /* 2 megs */ @@ -462,9 +478,7 @@ static void sd_set_csd(SDState *sd, uint64_t size) sd->csd[4] = 0x5b; sd->csd[5] = 0x59; sd->csd[6] = 0x00; - sd->csd[7] = (size >> 16) & 0xff; - sd->csd[8] = (size >> 8) & 0xff; - sd->csd[9] = (size & 0xff); + st24_be_p(&sd->csd[7], size); sd->csd[10] = 0x7f; sd->csd[11] = 0x80; sd->csd[12] = 0x0a; @@ -474,11 +488,23 @@ static void sd_set_csd(SDState *sd, uint64_t size) sd->csd[15] = (sd_crc7(sd->csd, 15) << 1) | 1; } +/* Relative Card Address register */ + static void sd_set_rca(SDState *sd) { sd->rca += 0x4567; } +static uint16_t sd_req_get_rca(SDState *s, SDRequest req) +{ + if (sd_cmd_type[req.cmd] == sd_ac || sd_cmd_type[req.cmd] == sd_adtc) { + return req.arg >> 16; + } + return 0; +} + +/* Card Status register */ + FIELD(CSR, AKE_SEQ_ERROR, 3, 1) FIELD(CSR, APP_CMD, 5, 1) FIELD(CSR, FX_EVENT, 6, 1) @@ -579,6 +605,14 @@ static void sd_response_r7_make(SDState *sd, uint8_t *response) stl_be_p(response, sd->vhs); } +static uint64_t sd_req_get_address(SDState *sd, SDRequest req) +{ + if (FIELD_EX32(sd->ocr, OCR, CARD_CAPACITY)) { + return (uint64_t) req.arg << HWBLOCK_SHIFT; + } + return req.arg; +} + static inline uint64_t sd_addr_to_wpnum(uint64_t addr) { return addr >> (HWBLOCK_SHIFT + SECTOR_SHIFT + WPGROUP_SHIFT); @@ -596,11 +630,13 @@ static void sd_reset(DeviceState *dev) } else { sect = 0; } - size = sect << 9; + size = sect << HWBLOCK_SHIFT; sect = sd_addr_to_wpnum(size) + 1; sd->state = sd_idle_state; + + /* card registers */ sd->rca = 0x0000; sd->size = size; sd_set_ocr(sd); @@ -797,8 +833,6 @@ static void sd_blk_write(SDState *sd, uint64_t addr, uint32_t len) } } -#define BLK_READ_BLOCK(a, len) sd_blk_read(sd, a, len) -#define BLK_WRITE_BLOCK(a, len) sd_blk_write(sd, a, len) #define APP_READ_BLOCK(a, len) memset(sd->data, 0xec, len) #define APP_WRITE_BLOCK(a, len) @@ -822,8 +856,8 @@ static void sd_erase(SDState *sd) if (FIELD_EX32(sd->ocr, OCR, CARD_CAPACITY)) { /* High capacity memory card: erase units are 512 byte blocks */ - erase_start *= 512; - erase_end *= 512; + erase_start <<= HWBLOCK_SHIFT; + erase_end <<= HWBLOCK_SHIFT; sdsc = false; } @@ -850,7 +884,7 @@ static void sd_erase(SDState *sd) continue; } } - BLK_WRITE_BLOCK(erase_addr, erase_len); + sd_blk_write(sd, erase_addr, erase_len); } } @@ -1007,6 +1041,15 @@ static sd_rsp_type_t sd_invalid_state_for_cmd(SDState *sd, SDRequest req) return sd_illegal; } +static sd_rsp_type_t sd_invalid_mode_for_cmd(SDState *sd, SDRequest req) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: CMD%i in a wrong mode: %s (spec %s)\n", + sd_proto(sd)->name, req.cmd, sd_mode_name(sd->mode), + sd_version_str(sd->spec_version)); + + return sd_illegal; +} + static sd_rsp_type_t sd_cmd_illegal(SDState *sd, SDRequest req) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Unknown CMD%i for spec %s\n", @@ -1017,6 +1060,7 @@ static sd_rsp_type_t sd_cmd_illegal(SDState *sd, SDRequest req) } /* Commands that are recognised but not yet implemented. */ +__attribute__((unused)) static sd_rsp_type_t sd_cmd_unimplemented(SDState *sd, SDRequest req) { qemu_log_mask(LOG_UNIMP, "%s: CMD%i not implemented\n", @@ -1025,6 +1069,7 @@ static sd_rsp_type_t sd_cmd_unimplemented(SDState *sd, SDRequest req) return sd_illegal; } +/* CMD0 */ static sd_rsp_type_t sd_cmd_GO_IDLE_STATE(SDState *sd, SDRequest req) { if (sd->state != sd_inactive_state) { @@ -1035,24 +1080,27 @@ static sd_rsp_type_t sd_cmd_GO_IDLE_STATE(SDState *sd, SDRequest req) return sd_is_spi(sd) ? sd_r1 : sd_r0; } -static sd_rsp_type_t sd_cmd_SEND_OP_CMD(SDState *sd, SDRequest req) +/* CMD1 */ +static sd_rsp_type_t spi_cmd_SEND_OP_COND(SDState *sd, SDRequest req) { sd->state = sd_transfer_state; return sd_r1; } +/* CMD2 */ static sd_rsp_type_t sd_cmd_ALL_SEND_CID(SDState *sd, SDRequest req) { - if (sd->state != sd_ready_state) { + switch (sd->state) { + case sd_ready_state: + sd->state = sd_identification_state; + return sd_r2_i; + default: return sd_invalid_state_for_cmd(sd, req); } - - sd->state = sd_identification_state; - - return sd_r2_i; } +/* CMD3 */ static sd_rsp_type_t sd_cmd_SEND_RELATIVE_ADDR(SDState *sd, SDRequest req) { switch (sd->state) { @@ -1067,41 +1115,44 @@ static sd_rsp_type_t sd_cmd_SEND_RELATIVE_ADDR(SDState *sd, SDRequest req) } } +/* CMD19 */ static sd_rsp_type_t sd_cmd_SEND_TUNING_BLOCK(SDState *sd, SDRequest req) { - if (sd->spec_version < SD_PHY_SPECv3_01_VERS) { - return sd_cmd_illegal(sd, req); - } + if (sd->spec_version < SD_PHY_SPECv3_01_VERS) { + return sd_cmd_illegal(sd, req); + } - if (sd->state != sd_transfer_state) { - return sd_invalid_state_for_cmd(sd, req); - } + if (sd->state != sd_transfer_state) { + return sd_invalid_state_for_cmd(sd, req); + } - sd->state = sd_sendingdata_state; - sd->data_offset = 0; + sd->state = sd_sendingdata_state; + sd->data_offset = 0; - return sd_r1; + return sd_r1; } +/* CMD23 */ static sd_rsp_type_t sd_cmd_SET_BLOCK_COUNT(SDState *sd, SDRequest req) { - if (sd->spec_version < SD_PHY_SPECv3_01_VERS) { - return sd_cmd_illegal(sd, req); - } + if (sd->spec_version < SD_PHY_SPECv3_01_VERS) { + return sd_cmd_illegal(sd, req); + } - if (sd->state != sd_transfer_state) { - return sd_invalid_state_for_cmd(sd, req); - } + if (sd->state != sd_transfer_state) { + return sd_invalid_state_for_cmd(sd, req); + } - sd->multi_blk_cnt = req.arg; + sd->multi_blk_cnt = req.arg; + trace_sdcard_set_block_count(sd->multi_blk_cnt); - return sd_r1; + return sd_r1; } static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) { - uint32_t rca = 0x0000; - uint64_t addr = (sd->ocr & (1 << 30)) ? (uint64_t) req.arg << 9 : req.arg; + uint16_t rca; + uint64_t addr; /* CMD55 precedes an ACMD, so we are not interested in tracing it. * However there is no ACMD55, so we want to trace this particular case. @@ -1115,11 +1166,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) /* Not interpreting this as an app command */ sd->card_status &= ~APP_CMD; - if (sd_cmd_type[req.cmd] == sd_ac - || sd_cmd_type[req.cmd] == sd_adtc) { - rca = req.arg >> 16; - } - /* CMD23 (set block count) must be immediately followed by CMD18 or CMD25 * if not, its effects are cancelled */ if (sd->multi_blk_cnt != 0 && !(req.cmd == 18 || req.cmd == 25)) { @@ -1148,20 +1194,17 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) break; case 6: /* CMD6: SWITCH_FUNCTION */ - switch (sd->mode) { - case sd_data_transfer_mode: - sd_function_switch(sd, req.arg); - sd->state = sd_sendingdata_state; - sd->data_start = 0; - sd->data_offset = 0; - return sd_r1; - - default: - break; + if (sd->mode != sd_data_transfer_mode) { + return sd_invalid_mode_for_cmd(sd, req); } - break; + sd_function_switch(sd, req.arg); + sd->state = sd_sendingdata_state; + sd->data_start = 0; + sd->data_offset = 0; + return sd_r1; case 7: /* CMD7: SELECT/DESELECT_CARD */ + rca = sd_req_get_rca(sd, req); switch (sd->state) { case sd_standby_state: if (sd->rca != rca) @@ -1216,6 +1259,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) return sd_r7; case 9: /* CMD9: SEND_CSD */ + rca = sd_req_get_rca(sd, req); switch (sd->state) { case sd_standby_state: if (sd->rca != rca) @@ -1229,7 +1273,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) } sd->state = sd_sendingdata_state; memcpy(sd->data, sd->csd, 16); - sd->data_start = addr; + sd->data_start = sd_req_get_address(sd, req); sd->data_offset = 0; return sd_r1; @@ -1239,6 +1283,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) break; case 10: /* CMD10: SEND_CID */ + rca = sd_req_get_rca(sd, req); switch (sd->state) { case sd_standby_state: if (sd->rca != rca) @@ -1252,7 +1297,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) } sd->state = sd_sendingdata_state; memcpy(sd->data, sd->cid, 16); - sd->data_start = addr; + sd->data_start = sd_req_get_address(sd, req); sd->data_offset = 0; return sd_r1; @@ -1279,32 +1324,25 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) break; case 13: /* CMD13: SEND_STATUS */ - switch (sd->mode) { - case sd_data_transfer_mode: - if (!sd_is_spi(sd) && sd->rca != rca) { - return sd_r0; - } - - return sd_r1; - - default: - break; + rca = sd_req_get_rca(sd, req); + if (sd->mode != sd_data_transfer_mode) { + return sd_invalid_mode_for_cmd(sd, req); + } + if (!sd_is_spi(sd) && sd->rca != rca) { + return sd_r0; } - break; - case 15: /* CMD15: GO_INACTIVE_STATE */ - switch (sd->mode) { - case sd_data_transfer_mode: - if (sd->rca != rca) - return sd_r0; + return sd_r1; + case 15: /* CMD15: GO_INACTIVE_STATE */ + if (sd->mode != sd_data_transfer_mode) { + return sd_invalid_mode_for_cmd(sd, req); + } + rca = sd_req_get_rca(sd, req); + if (sd->rca == rca) { sd->state = sd_inactive_state; - return sd_r0; - - default: - break; } - break; + return sd_r0; /* Block read commands (Class 2) */ case 16: /* CMD16: SET_BLOCKLEN */ @@ -1326,6 +1364,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) case 17: /* CMD17: READ_SINGLE_BLOCK */ case 18: /* CMD18: READ_MULTIPLE_BLOCK */ + addr = sd_req_get_address(sd, req); switch (sd->state) { case sd_transfer_state: @@ -1346,6 +1385,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) /* Block write commands (Class 4) */ case 24: /* CMD24: WRITE_SINGLE_BLOCK */ case 25: /* CMD25: WRITE_MULTIPLE_BLOCK */ + addr = sd_req_get_address(sd, req); switch (sd->state) { case sd_transfer_state: @@ -1404,7 +1444,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) if (sd->size > SDSC_MAX_CAPACITY) { return sd_illegal; } - + addr = sd_req_get_address(sd, req); switch (sd->state) { case sd_transfer_state: if (!address_in_range(sd, "SET_WRITE_PROT", addr, 1)) { @@ -1426,7 +1466,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) if (sd->size > SDSC_MAX_CAPACITY) { return sd_illegal; } - + addr = sd_req_get_address(sd, req); switch (sd->state) { case sd_transfer_state: if (!address_in_range(sd, "CLR_WRITE_PROT", addr, 1)) { @@ -1448,7 +1488,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) if (sd->size > SDSC_MAX_CAPACITY) { return sd_illegal; } - + addr = sd_req_get_address(sd, req); switch (sd->state) { case sd_transfer_state: if (!address_in_range(sd, "SEND_WRITE_PROT", @@ -1525,6 +1565,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req) /* Application specific commands (Class 8) */ case 55: /* CMD55: APP_CMD */ + rca = sd_req_get_rca(sd, req); switch (sd->state) { case sd_ready_state: case sd_identification_state: @@ -1720,7 +1761,7 @@ static sd_rsp_type_t sd_app_command(SDState *sd, return sd_illegal; } -static int cmd_valid_while_locked(SDState *sd, const uint8_t cmd) +static bool cmd_valid_while_locked(SDState *sd, unsigned cmd) { /* Valid commands in locked state: * basic class (0) @@ -1734,7 +1775,7 @@ static int cmd_valid_while_locked(SDState *sd, const uint8_t cmd) return cmd == 41 || cmd == 42; } if (cmd == 16 || cmd == 55) { - return 1; + return true; } return sd_cmd_class[cmd] == 0 || sd_cmd_class[cmd] == 7; } @@ -1788,8 +1829,8 @@ int sd_do_command(SDState *sd, SDRequest *req, * (Do this now so they appear in r1 responses.) */ sd->current_cmd = req->cmd; - sd->card_status &= ~CURRENT_STATE; - sd->card_status |= (last_state << 9); + sd->card_status = FIELD_DP32(sd->card_status, CSR, + CURRENT_STATE, last_state); } send_response: @@ -1826,6 +1867,13 @@ send_response: break; case sd_r0: + /* + * Invalid state transition, reset implementation + * fields to avoid OOB abuse. + */ + sd->data_start = 0; + sd->data_offset = 0; + /* fall-through */ case sd_illegal: rsplen = 0; break; @@ -1873,7 +1921,7 @@ void sd_write_byte(SDState *sd, uint8_t value) if (sd->data_offset >= sd->blk_len) { /* TODO: Check CRC before committing */ sd->state = sd_programming_state; - BLK_WRITE_BLOCK(sd->data_start, sd->data_offset); + sd_blk_write(sd, sd->data_start, sd->data_offset); sd->blk_written ++; sd->csd[14] |= 0x40; /* Bzzzzzzztt .... Operation complete. */ @@ -1899,7 +1947,7 @@ void sd_write_byte(SDState *sd, uint8_t value) if (sd->data_offset >= sd->blk_len) { /* TODO: Check CRC before committing */ sd->state = sd_programming_state; - BLK_WRITE_BLOCK(sd->data_start, sd->data_offset); + sd_blk_write(sd, sd->data_start, sd->data_offset); sd->blk_written++; sd->data_start += sd->blk_len; sd->data_offset = 0; @@ -2047,8 +2095,9 @@ uint8_t sd_read_byte(SDState *sd) break; case 17: /* CMD17: READ_SINGLE_BLOCK */ - if (sd->data_offset == 0) - BLK_READ_BLOCK(sd->data_start, io_len); + if (sd->data_offset == 0) { + sd_blk_read(sd, sd->data_start, io_len); + } ret = sd->data[sd->data_offset ++]; if (sd->data_offset >= io_len) @@ -2061,7 +2110,7 @@ uint8_t sd_read_byte(SDState *sd) sd->data_start, io_len)) { return 0x00; } - BLK_READ_BLOCK(sd->data_start, io_len); + sd_blk_read(sd, sd->data_start, io_len); } ret = sd->data[sd->data_offset ++]; @@ -2143,17 +2192,10 @@ static const SDProto sd_proto_spi = { .name = "SPI", .cmd = { [0] = sd_cmd_GO_IDLE_STATE, - [1] = sd_cmd_SEND_OP_CMD, - [2 ... 4] = sd_cmd_illegal, - [5] = sd_cmd_illegal, - [7] = sd_cmd_illegal, - [15] = sd_cmd_illegal, - [26] = sd_cmd_illegal, - [52 ... 54] = sd_cmd_illegal, + [1] = spi_cmd_SEND_OP_COND, }, .acmd = { - [6] = sd_cmd_unimplemented, - [41] = sd_cmd_SEND_OP_CMD, + [41] = spi_cmd_SEND_OP_COND, }, }; @@ -2161,15 +2203,10 @@ static const SDProto sd_proto_sd = { .name = "SD", .cmd = { [0] = sd_cmd_GO_IDLE_STATE, - [1] = sd_cmd_illegal, [2] = sd_cmd_ALL_SEND_CID, [3] = sd_cmd_SEND_RELATIVE_ADDR, - [5] = sd_cmd_illegal, [19] = sd_cmd_SEND_TUNING_BLOCK, [23] = sd_cmd_SET_BLOCK_COUNT, - [52 ... 54] = sd_cmd_illegal, - [58] = sd_cmd_illegal, - [59] = sd_cmd_illegal, }, }; diff --git a/hw/sd/sdmmc-internal.c b/hw/sd/sdmmc-internal.c index 8648a78..c1d5508 100644 --- a/hw/sd/sdmmc-internal.c +++ b/hw/sd/sdmmc-internal.c @@ -14,7 +14,7 @@ const char *sd_cmd_name(uint8_t cmd) { static const char *cmd_abbrev[SDMMC_CMD_MAX] = { - [0] = "GO_IDLE_STATE", [1] = "SEND_OP_CMD", + [0] = "GO_IDLE_STATE", [1] = "SEND_OP_COND", [2] = "ALL_SEND_CID", [3] = "SEND_RELATIVE_ADDR", [4] = "SET_DSR", [5] = "IO_SEND_OP_COND", [6] = "SWITCH_FUNC", [7] = "SELECT/DESELECT_CARD", diff --git a/hw/sd/trace-events b/hw/sd/trace-events index 94a0055..724365e 100644 --- a/hw/sd/trace-events +++ b/hw/sd/trace-events @@ -43,7 +43,8 @@ sdcard_response(const char *rspdesc, int rsplen) "%s (sz:%d)" sdcard_powerup(void) "" sdcard_inquiry_cmd41(void) "" sdcard_reset(void) "" -sdcard_set_blocklen(uint16_t length) "0x%03x" +sdcard_set_blocklen(uint16_t length) "block len 0x%03x" +sdcard_set_block_count(uint32_t cnt) "block cnt 0x%"PRIx32 sdcard_inserted(bool readonly) "read_only: %u" sdcard_ejected(void) "" sdcard_erase(uint32_t first, uint32_t last) "addr first 0x%" PRIx32" last 0x%" PRIx32 diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f9619a1..7cdb969 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -630,16 +630,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - if (bcontainer->iova_ranges) { - ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, - bcontainer->iova_ranges, - &err); - if (ret) { - g_free(giommu); - goto fail; - } - } - ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { @@ -849,20 +839,11 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, return false; } -static void vfio_dirty_tracking_update(MemoryListener *listener, - MemoryRegionSection *section) +static void vfio_dirty_tracking_update_range(VFIODirtyRanges *range, + hwaddr iova, hwaddr end, + bool update_pci) { - VFIODirtyRangesListener *dirty = container_of(listener, - VFIODirtyRangesListener, - listener); - VFIODirtyRanges *range = &dirty->ranges; - hwaddr iova, end, *min, *max; - - if (!vfio_listener_valid_section(section, "tracking_update") || - !vfio_get_section_iova_range(dirty->bcontainer, section, - &iova, &end, NULL)) { - return; - } + hwaddr *min, *max; /* * The address space passed to the dirty tracker is reduced to three ranges: @@ -883,8 +864,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, * The alternative would be an IOVATree but that has a much bigger runtime * overhead and unnecessary complexity. */ - if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && - iova >= UINT32_MAX) { + if (update_pci && iova >= UINT32_MAX) { min = &range->minpci64; max = &range->maxpci64; } else { @@ -899,7 +879,23 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, } trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); - return; +} + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIODirtyRangesListener *dirty = + container_of(listener, VFIODirtyRangesListener, listener); + hwaddr iova, end; + + if (!vfio_listener_valid_section(section, "tracking_update") || + !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } + + vfio_dirty_tracking_update_range(&dirty->ranges, iova, end, + vfio_section_is_vfio_pci(section, dirty->bcontainer)); } static const MemoryListener vfio_dirty_tracking_listener = { @@ -1030,7 +1026,7 @@ static void vfio_device_feature_dma_logging_start_destroy( g_free(feature); } -static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, +static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, Error **errp) { struct vfio_device_feature *feature; @@ -1043,7 +1039,7 @@ static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, &ranges); if (!feature) { error_setg_errno(errp, errno, "Failed to prepare DMA logging"); - return -errno; + return false; } QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { @@ -1068,7 +1064,7 @@ out: vfio_device_feature_dma_logging_start_destroy(feature); - return ret; + return ret == 0; } static bool vfio_listener_log_global_start(MemoryListener *listener, @@ -1077,18 +1073,18 @@ static bool vfio_listener_log_global_start(MemoryListener *listener, ERRP_GUARD(); VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); - int ret; + bool ret; if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ret = vfio_devices_dma_logging_start(bcontainer, errp); } else { - ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp); + ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp) == 0; } - if (ret) { + if (!ret) { error_prepend(errp, "vfio: Could not start dirty page tracking - "); } - return !ret; + return ret; } static void vfio_listener_log_global_stop(MemoryListener *listener) @@ -1306,37 +1302,50 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, &vrdl); } +static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIOGuestIOMMU *giommu; + bool found = false; + Int128 llend; + vfio_giommu_dirty_notifier gdn; + int idx; + + QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + found = true; + break; + } + } + + if (!found) { + return 0; + } + + gdn.giommu = giommu; + idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, + MEMTXATTRS_UNSPECIFIED); + + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); + + iommu_notifier_init(&gdn.n, vfio_iommu_map_dirty_notify, IOMMU_NOTIFIER_MAP, + section->offset_within_region, int128_get64(llend), + idx); + memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); + + return 0; +} + static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { ram_addr_t ram_addr; if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - Int128 llend; - vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; - int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, - MEMTXATTRS_UNSPECIFIED); - - llend = int128_add(int128_make64(section->offset_within_region), - section->size); - llend = int128_sub(llend, int128_one()); - - iommu_notifier_init(&gdn.n, - vfio_iommu_map_dirty_notify, - IOMMU_NOTIFIER_MAP, - section->offset_within_region, - int128_get64(llend), - idx); - memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); - break; - } - } - return 0; + return vfio_sync_iommu_dirty_bitmap(bcontainer, section); } else if (memory_region_has_ram_discard_manager(section->mr)) { int ret; @@ -1499,6 +1508,13 @@ void vfio_put_address_space(VFIOAddressSpace *space) } } +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer) +{ + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + bcontainer->space = space; +} + struct vfio_device_info *vfio_get_device_info(int fd) { struct vfio_device_info *info; @@ -1528,6 +1544,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, { const VFIOIOMMUClass *ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + HostIOMMUDevice *hiod; if (vbasedev->iommufd) { ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); @@ -1535,7 +1552,19 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, assert(ops); - return ops->attach_device(name, vbasedev, as, errp); + if (!ops->attach_device(name, vbasedev, as, errp)) { + return false; + } + + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); + if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { + object_unref(hiod); + ops->detach_device(vbasedev); + return false; + } + vbasedev->hiod = hiod; + + return true; } void vfio_detach_device(VFIODevice *vbasedev) @@ -1543,5 +1572,6 @@ void vfio_detach_device(VFIODevice *vbasedev) if (!vbasedev->bcontainer) { return; } - vbasedev->bcontainer->ops->detach_device(vbasedev); + object_unref(vbasedev->hiod); + VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 760d9d0..50b1664 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -19,73 +19,73 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) { - g_assert(bcontainer->ops->dma_map); - return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_map); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) { - g_assert(bcontainer->ops->dma_unmap); - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_unmap); + return vioc->dma_unmap(bcontainer, iova, size, iotlb); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { - if (!bcontainer->ops->add_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->add_window) { return true; } - return bcontainer->ops->add_window(bcontainer, section, errp); + return vioc->add_window(bcontainer, section, errp); } void vfio_container_del_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - if (!bcontainer->ops->del_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->del_window) { return; } - return bcontainer->ops->del_window(bcontainer, section); + return vioc->del_window(bcontainer, section); } int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, bool start, Error **errp) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + if (!bcontainer->dirty_pages_supported) { return 0; } - g_assert(bcontainer->ops->set_dirty_page_tracking); - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp); + g_assert(vioc->set_dirty_page_tracking); + return vioc->set_dirty_page_tracking(bcontainer, start, errp); } int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { - g_assert(bcontainer->ops->query_dirty_bitmap); - return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size, - errp); -} + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); -void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, - const VFIOIOMMUClass *ops) -{ - bcontainer->ops = ops; - bcontainer->space = space; - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); + g_assert(vioc->query_dirty_bitmap); + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, + errp); } -void vfio_container_destroy(VFIOContainerBase *bcontainer) +static void vfio_container_instance_finalize(Object *obj) { + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); VFIOGuestIOMMU *giommu, *tmp; QLIST_REMOVE(bcontainer, next); @@ -100,11 +100,27 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) g_list_free_full(bcontainer->iova_ranges, g_free); } +static void vfio_container_instance_init(Object *obj) +{ + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); + + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); +} + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU, - .parent = TYPE_INTERFACE, + .parent = TYPE_OBJECT, + .instance_init = vfio_container_instance_init, + .instance_finalize = vfio_container_instance_finalize, + .instance_size = sizeof(VFIOContainerBase), .class_size = sizeof(VFIOIOMMUClass), + .abstract = true, }, }; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 096cc97..2e7ecdf 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -354,7 +354,7 @@ static void vfio_kvm_device_del_group(VFIOGroup *group) /* * vfio_get_iommu_type - selects the richest iommu_type (v2 first) */ -static int vfio_get_iommu_type(VFIOContainer *container, +static int vfio_get_iommu_type(int container_fd, Error **errp) { int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, @@ -362,7 +362,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, int i; for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { return iommu_types[i]; } } @@ -373,67 +373,70 @@ static int vfio_get_iommu_type(VFIOContainer *container, /* * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type */ -static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) +static const char *vfio_get_iommu_class_name(int iommu_type) { - ObjectClass *klass = NULL; - switch (iommu_type) { case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + return TYPE_VFIO_IOMMU_LEGACY; break; case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); + return TYPE_VFIO_IOMMU_SPAPR; break; default: g_assert_not_reached(); }; - - return VFIO_IOMMU_CLASS(klass); } -static bool vfio_set_iommu(VFIOContainer *container, int group_fd, - VFIOAddressSpace *space, Error **errp) +static bool vfio_set_iommu(int container_fd, int group_fd, + int *iommu_type, Error **errp) { - int iommu_type; - const VFIOIOMMUClass *vioc; - - iommu_type = vfio_get_iommu_type(container, errp); - if (iommu_type < 0) { - return false; - } - - if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) { error_setg_errno(errp, errno, "Failed to set group container"); return false; } - while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { - if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) { + if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { /* * On sPAPR, despite the IOMMU subdriver always advertises v1 and * v2, the running platform may not support v2 and there is no * way to guess it until an IOMMU group gets added to the container. * So in case it fails with v2, try v1 as a fallback. */ - iommu_type = VFIO_SPAPR_TCE_IOMMU; + *iommu_type = VFIO_SPAPR_TCE_IOMMU; continue; } error_setg_errno(errp, errno, "Failed to set iommu for container"); return false; } - container->iommu_type = iommu_type; + return true; +} - vioc = vfio_get_iommu_class(iommu_type, errp); - if (!vioc) { - error_setg(errp, "No available IOMMU models"); - return false; +static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, + Error **errp) +{ + int iommu_type; + const char *vioc_name; + VFIOContainer *container; + + iommu_type = vfio_get_iommu_type(fd, errp); + if (iommu_type < 0) { + return NULL; } - vfio_container_init(&container->bcontainer, space, vioc); - return true; + if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + return NULL; + } + + vioc_name = vfio_get_iommu_class_name(iommu_type); + + container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); + container->fd = fd; + container->iommu_type = iommu_type; + return container; } static int vfio_get_iommu_info(VFIOContainer *container, @@ -542,6 +545,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, VFIOContainerBase *bcontainer; int ret, fd; VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; space = vfio_get_address_space(as); @@ -610,13 +614,11 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto close_fd_exit; } - container = g_malloc0(sizeof(*container)); - container->fd = fd; - bcontainer = &container->bcontainer; - - if (!vfio_set_iommu(container, group->fd, space, errp)) { - goto free_container_exit; + container = vfio_create_container(fd, group, errp); + if (!container) { + goto close_fd_exit; } + bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { goto free_container_exit; @@ -628,16 +630,16 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto unregister_container_exit; } - assert(bcontainer->ops->setup); + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); - if (!bcontainer->ops->setup(bcontainer, errp)) { + if (!vioc->setup(bcontainer, errp)) { goto enable_discards_exit; } vfio_kvm_device_add_group(group); - QLIST_INIT(&container->group_list); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); @@ -659,8 +661,8 @@ listener_release_exit: QLIST_REMOVE(bcontainer, next); vfio_kvm_device_del_group(group); memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } enable_discards_exit: @@ -670,7 +672,7 @@ unregister_container_exit: vfio_cpr_unregister_container(bcontainer); free_container_exit: - g_free(container); + object_unref(container); close_fd_exit: close(fd); @@ -685,6 +687,7 @@ static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); QLIST_REMOVE(group, container_next); group->container = NULL; @@ -696,8 +699,8 @@ static void vfio_disconnect_container(VFIOGroup *group) */ if (QLIST_EMPTY(&container->group_list)) { memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } } @@ -709,12 +712,10 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = bcontainer->space; - vfio_container_destroy(bcontainer); - trace_vfio_disconnect_container(container->fd); vfio_cpr_unregister_container(bcontainer); close(container->fd); - g_free(container); + object_unref(container); vfio_put_address_space(space); } @@ -1126,6 +1127,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO; + vioc->setup = vfio_legacy_setup; vioc->dma_map = vfio_legacy_dma_map; vioc->dma_unmap = vfio_legacy_dma_unmap; @@ -1136,12 +1139,75 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; }; +static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + + hiod->name = g_strdup(vdev->name); + hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); + hiod->agent = opaque; + + return true; +} + +static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, + Error **errp) +{ + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } +} + +static GList * +hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + +static void vfio_iommu_legacy_instance_init(Object *obj) +{ + VFIOContainer *container = VFIO_IOMMU_LEGACY(obj); + + QLIST_INIT(&container->group_list); +} + +static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->realize = hiod_legacy_vfio_realize; + hioc->get_cap = hiod_legacy_vfio_get_cap; + hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_LEGACY, .parent = TYPE_VFIO_IOMMU, + .instance_init = vfio_iommu_legacy_instance_init, + .instance_size = sizeof(VFIOContainer), .class_init = vfio_iommu_legacy_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_legacy_vfio_class_init, + } }; DEFINE_TYPES(types) diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 27ea26a..b14edd4 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -658,3 +658,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, vbasedev->ram_block_discard_allowed = ram_discard; } + +int vfio_device_get_aw_bits(VFIODevice *vdev) +{ + /* + * iova_ranges is a sorted list. For old kernels that support + * VFIO but not support query of iova ranges, iova_ranges is NULL, + * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned. + */ + GList *l = g_list_last(vdev->bcontainer->iova_ranges); + + if (l) { + Range *range = l->data; + return range_get_last_bit(range) + 1; + } + + return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; +} diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 554f9a6..c2f158e 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -237,9 +237,8 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) return; } memory_listener_unregister(&bcontainer->listener); - vfio_container_destroy(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); - g_free(container); + object_unref(container); } static int iommufd_cdev_ram_block_discard_disable(bool state) @@ -324,7 +323,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - if (bcontainer->ops != iommufd_vioc || + if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || vbasedev->iommufd != container->be) { continue; } @@ -352,13 +351,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); - container = g_malloc0(sizeof(*container)); + container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, iommufd_vioc); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { goto err_attach_container; @@ -465,7 +463,7 @@ static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { - if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) { continue; } if (devid == vbasedev_iter->devid) { @@ -612,6 +610,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; + vioc->dma_map = iommufd_cdev_map; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; @@ -619,12 +619,64 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; }; +static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + HostIOMMUDeviceCaps *caps = &hiod->caps; + enum iommu_hw_info_type type; + union { + struct iommu_hw_info_vtd vtd; + } data; + + hiod->agent = opaque; + + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, + &type, &data, sizeof(data), errp)) { + return false; + } + + hiod->name = g_strdup(vdev->name); + caps->type = type; + caps->aw_bits = vfio_device_get_aw_bits(vdev); + + return true; +} + +static GList * +hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + +static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + + hiodc->realize = hiod_iommufd_vfio_realize; + hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_IOMMUFD, .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOIOMMUFDContainer), .class_init = vfio_iommu_iommufd_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .class_init = hiod_iommufd_vfio_class_init, + } }; DEFINE_TYPES(types) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 74a79bd..e03d9f3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2511,9 +2511,9 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) { VFIODevice *vbasedev = &vdev->vbasedev; - const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer); - return ops->pci_hot_reset(vbasedev, single); + return vioc->pci_hot_reset(vbasedev, single); } /* @@ -3121,10 +3121,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_register(vdev); - if (!vfio_add_capabilities(vdev, errp)) { + if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { + error_prepend(errp, "Failed to set iommu_device: "); goto out_teardown; } + if (!vfio_add_capabilities(vdev, errp)) { + goto out_unset_idev; + } + if (vdev->vga) { vfio_vga_quirk_setup(vdev); } @@ -3141,7 +3146,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) error_setg(errp, "cannot support IGD OpRegion feature on hotplugged " "device"); - goto out_teardown; + goto out_unset_idev; } ret = vfio_get_dev_region_info(vbasedev, @@ -3150,11 +3155,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (ret) { error_setg_errno(errp, -ret, "does not support requested IGD OpRegion feature"); - goto out_teardown; + goto out_unset_idev; } if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - goto out_teardown; + goto out_unset_idev; } } @@ -3238,6 +3243,8 @@ out_deregister: if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } +out_unset_idev: + pci_device_unset_iommu_device(pdev); out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); @@ -3266,6 +3273,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); vfio_unregister_err_notifier(vdev); @@ -3280,7 +3288,8 @@ static void vfio_exitfn(PCIDevice *pdev) vfio_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); - vfio_migration_exit(&vdev->vbasedev); + vfio_migration_exit(vbasedev); + pci_device_unset_iommu_device(pdev); } static void vfio_pci_reset(DeviceState *dev) diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 47b040f..018bd20 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -30,6 +30,8 @@ typedef struct VFIOSpaprContainer { QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; } VFIOSpaprContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR); + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) { if (memory_region_is_iommu(section->mr)) { @@ -548,6 +550,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_SPAPR, .parent = TYPE_VFIO_IOMMU_LEGACY, + .instance_size = sizeof(VFIOSpaprContainer), .class_init = vfio_iommu_spapr_class_init, }, }; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1326c6e..b9a7ddc 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping { uint32_t flags; } VirtIOIOMMUMapping; +struct hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ -462,8 +467,195 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct hiod_key *key1 = v1; + const struct hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static guint hiod_hash(gconstpointer v) +{ + const struct hiod_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + +static void hiod_destroy(gpointer v) +{ + object_unref(v); +} + +static HostIOMMUDevice * +get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + return g_hash_table_lookup(viommu->host_iommu_devices, &key); +} + +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn, GList *iova_ranges, + Error **errp) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + GList *current_ranges; + GList *l, *tmp, *new_ranges = NULL; + int ret = -EINVAL; + + if (!sbus) { + error_report("%s no sbus", __func__); + } + + sdev = sbus->pbdev[devfn]; + + current_ranges = sdev->host_resv_ranges; + + g_assert(!sdev->probe_done); + + /* check that each new resv region is included in an existing one */ + if (sdev->host_resv_ranges) { + range_inverse_array(iova_ranges, + &new_ranges, + 0, UINT64_MAX); + + for (tmp = new_ranges; tmp; tmp = tmp->next) { + Range *newr = (Range *)tmp->data; + bool included = false; + + for (l = current_ranges; l; l = l->next) { + Range * r = (Range *)l->data; + + if (range_contains_range(r, newr)) { + included = true; + break; + } + } + if (!included) { + goto error; + } + } + /* all new reserved ranges are included in existing ones */ + ret = 0; + goto out; + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +error: + error_setg(errp, "%s Conflicting host reserved ranges set!", + __func__); +out: + g_list_free_full(new_ranges, g_free); + return ret; +} + +static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + struct hiod_key *new_key; + GList *host_iova_ranges = NULL; + + assert(hiod); + + if (get_host_iommu_device(viommu, bus, devfn)) { + error_setg(errp, "Host IOMMU device already exists"); + return false; + } + + if (hiodc->get_iova_ranges) { + int ret; + host_iova_ranges = hiodc->get_iova_ranges(hiod, errp); + if (!host_iova_ranges) { + return true; /* some old kernels may not support that capability */ + } + ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn, + host_iova_ranges, errp); + if (ret) { + g_list_free_full(host_iova_ranges, g_free); + return false; + } + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + g_list_free_full(host_iova_ranges, g_free); + + return true; +} + +static void +virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDevice *hiod; + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); + if (!hiod) { + return; + } + + g_hash_table_remove(viommu->host_iommu_devices, &key); +} + static const PCIIOMMUOps virtio_iommu_ops = { .get_address_space = virtio_iommu_find_add_as, + .set_iommu_device = virtio_iommu_set_iommu_device, + .unset_iommu_device = virtio_iommu_unset_iommu_device, }; static int virtio_iommu_attach(VirtIOIOMMU *s, @@ -1159,106 +1351,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } -/** - * rebuild_resv_regions: rebuild resv regions with both the - * info of host resv ranges and property set resv ranges - */ -static int rebuild_resv_regions(IOMMUDevice *sdev) -{ - GList *l; - int i = 0; - - /* free the existing list and rebuild it from scratch */ - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - - /* First add host reserved regions if any, all tagged as RESERVED */ - for (l = sdev->host_resv_ranges; l; l = l->next) { - ReservedRegion *reg = g_new0(ReservedRegion, 1); - Range *r = (Range *)l->data; - - reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; - range_set_bounds(®->range, range_lob(r), range_upb(r)); - sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); - trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, - range_lob(®->range), - range_upb(®->range)); - i++; - } - /* - * then add higher priority reserved regions set by the machine - * through properties - */ - add_prop_resv_regions(sdev); - return 0; -} - -/** - * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges - * - * The function turns those into reserved ranges. Once some - * reserved ranges have been set, new reserved regions cannot be - * added outside of the original ones. - * - * @mr: IOMMU MR - * @iova_ranges: list of usable IOVA ranges - * @errp: error handle - */ -static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - GList *current_ranges = sdev->host_resv_ranges; - GList *l, *tmp, *new_ranges = NULL; - int ret = -EINVAL; - - /* check that each new resv region is included in an existing one */ - if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; - } - - if (sdev->probe_done) { - warn_report("%s: Notified about new host reserved regions after probe", - mr->parent_obj.name); - } - - range_inverse_array(iova_ranges, - &sdev->host_resv_ranges, - 0, UINT64_MAX); - rebuild_resv_regions(sdev); - - return 0; -error: - error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", - mr->parent_obj.name); -out: - g_list_free_full(new_ranges, g_free); - return ret; -} - static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1357,6 +1449,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, + g_free, hiod_destroy); + if (s->primary_bus) { pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { @@ -1581,7 +1676,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; - imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { diff --git a/include/exec/memory.h b/include/exec/memory.h index 2d7c278..0903513 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -530,26 +530,6 @@ struct IOMMUMemoryRegionClass { int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu, uint64_t page_size_mask, Error **errp); - /** - * @iommu_set_iova_ranges: - * - * Propagate information about the usable IOVA ranges for a given IOMMU - * memory region. Used for example to propagate host physical device - * reserved memory region constraints to the virtual IOMMU. - * - * Optional method: if this method is not provided, then the default IOVA - * aperture is used. - * - * @iommu: the IOMMUMemoryRegion - * - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * - * Returns 0 on success, or a negative error. In case of failure, the error - * object must be created. - */ - int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); }; typedef struct RamDiscardListener RamDiscardListener; @@ -1952,18 +1932,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, Error **errp); /** - * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges - * for a given IOMMU MR region - * - * @iommu: IOMMU memory region - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * @errp: pointer to Error*, to store an error if it happens. - */ -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); - -/** * memory_region_name: get a memory region's name * * Returns the string that was used to initialize the memory region. diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 7fa0a69..1eb05c2 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -292,6 +292,8 @@ struct IntelIOMMUState { /* list of registered notifiers */ QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */ + /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ dma_addr_t intr_root; /* Interrupt remapping table pointer */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index eaa3fc9..eb26cac 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -3,6 +3,7 @@ #include "exec/memory.h" #include "sysemu/dma.h" +#include "sysemu/host_iommu_device.h" /* PCI includes legacy ISA access. */ #include "hw/isa/isa.h" @@ -383,10 +384,45 @@ typedef struct PCIIOMMUOps { * * @devfn: device and function number */ - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + /** + * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU + * + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't + * retrieve host information from the associated HostIOMMUDevice. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @dev: the #HostIOMMUDevice to attach. + * + * @errp: pass an Error out only when return false + * + * Returns: true if HostIOMMUDevice is attached or else false with errp set. + */ + bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *dev, Error **errp); + /** + * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + */ + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp); +void pci_device_unset_iommu_device(PCIDevice *dev); /** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index 3db8391..c0dc41f 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -118,6 +118,7 @@ enum { #define FDT_PLIC_ADDR_CELLS 0 #define FDT_PLIC_INT_CELLS 1 #define FDT_APLIC_INT_CELLS 2 +#define FDT_APLIC_ADDR_CELLS 0 #define FDT_IMSIC_INT_CELLS 0 #define FDT_MAX_INT_CELLS 2 #define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 4cb1ab8..e8ddf92 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -31,6 +31,8 @@ #endif #include "sysemu/sysemu.h" #include "hw/vfio/vfio-container-base.h" +#include "sysemu/host_iommu_device.h" +#include "sysemu/iommufd.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -82,6 +84,8 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOGroup) group_list; } VFIOContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); + typedef struct VFIOHostDMAWindow { hwaddr min_iova; hwaddr max_iova; @@ -97,6 +101,8 @@ typedef struct VFIOIOMMUFDContainer { uint32_t ioas_id; } VFIOIOMMUFDContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD); + typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { @@ -125,6 +131,7 @@ typedef struct VFIODevice { OnOffAuto pre_copy_dirty_page_tracking; bool dirty_pages_supported; bool dirty_tracking; + HostIOMMUDevice *hiod; int devid; IOMMUFDBackend *iommufd; } VFIODevice; @@ -171,6 +178,10 @@ typedef struct VFIOGroup { bool ram_block_discard_allowed; } VFIOGroup; +#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ + TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" + typedef struct VFIODMABuf { QemuDmaBuf *buf; uint32_t pos_x, pos_y, pos_updates; @@ -199,10 +210,8 @@ typedef struct VFIODisplay { VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); - -/* SPAPR specific */ -int vfio_spapr_container_init(VFIOContainer *container, Error **errp); -void vfio_spapr_container_deinit(VFIOContainer *container); +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); @@ -283,4 +292,5 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); +int vfio_device_get_aw_bits(VFIODevice *vdev); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 2776481..419e45e 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -34,7 +34,7 @@ typedef struct VFIOAddressSpace { * This is the base object for vfio container backends */ typedef struct VFIOContainerBase { - const VFIOIOMMUClass *ops; + Object parent; VFIOAddressSpace *space; MemoryListener listener; Error *error; @@ -86,28 +86,18 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); -void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, - const VFIOIOMMUClass *ops); -void vfio_container_destroy(VFIOContainerBase *bcontainer); - - #define TYPE_VFIO_IOMMU "vfio-iommu" #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" -/* - * VFIOContainerBase is not an abstract QOM object because it felt - * unnecessary to expose all the IOMMU backends to the QEMU machine - * and human interface. However, we can still abstract the IOMMU - * backend handlers using a QOM interface class. This provides more - * flexibility when referencing the various implementations. - */ -DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) +OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { - InterfaceClass parent_class; + ObjectClass parent_class; + + /* Properties */ + const char *hiod_typename; /* basic feature */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index 83a52cc..bdb3da7 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -25,6 +25,7 @@ #include "hw/pci/pci.h" #include "qom/object.h" #include "qapi/qapi-types-virtio.h" +#include "sysemu/host_iommu_device.h" #define TYPE_VIRTIO_IOMMU "virtio-iommu-device" #define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci" @@ -57,6 +58,7 @@ struct VirtIOIOMMU { struct virtio_iommu_config config; uint64_t features; GHashTable *as_by_busptr; + GHashTable *host_iommu_devices; IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; ReservedRegion *prop_resv_regions; diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index bd67468..ad22910 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -38,12 +38,14 @@ static inline void bswap64s(uint64_t *s) #if HOST_BIG_ENDIAN #define be_bswap(v, size) (v) #define le_bswap(v, size) glue(__builtin_bswap, size)(v) +#define be_bswap24(v) (v) #define le_bswap24(v) bswap24(v) #define be_bswaps(v, size) #define le_bswaps(p, size) \ do { *p = glue(__builtin_bswap, size)(*p); } while (0) #else #define le_bswap(v, size) (v) +#define be_bswap24(v) bswap24(v) #define le_bswap24(v) (v) #define be_bswap(v, size) glue(__builtin_bswap, size)(v) #define le_bswaps(v, size) @@ -357,6 +359,11 @@ static inline void stw_be_p(void *ptr, uint16_t v) stw_he_p(ptr, be_bswap(v, 16)); } +static inline void st24_be_p(void *ptr, uint32_t v) +{ + st24_he_p(ptr, be_bswap24(v)); +} + static inline void stl_be_p(void *ptr, uint32_t v) { stl_he_p(ptr, be_bswap(v, 32)); diff --git a/include/qemu/range.h b/include/qemu/range.h index 205e1da..4ce694a 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -20,6 +20,8 @@ #ifndef QEMU_RANGE_H #define QEMU_RANGE_H +#include "qemu/bitops.h" + /* * Operations on 64 bit address ranges. * Notes: @@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* Get highest non-zero bit position of a range */ +static inline int range_get_last_bit(Range *range) +{ + if (range_is_empty(range)) { + return -1; + } + return 63 - clz64(range->upb); +} + /* * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. * Both @a and @b must not be empty. diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h new file mode 100644 index 0000000..ee6c813 --- /dev/null +++ b/include/sysemu/host_iommu_device.h @@ -0,0 +1,102 @@ +/* + * Host IOMMU device abstract declaration + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HOST_IOMMU_DEVICE_H +#define HOST_IOMMU_DEVICE_H + +#include "qom/object.h" +#include "qapi/error.h" + +/** + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. + * + * @aw_bits: host IOMMU address width. 0xff if no limitation. + */ +typedef struct HostIOMMUDeviceCaps { + uint32_t type; + uint8_t aw_bits; +} HostIOMMUDeviceCaps; + +#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) + +struct HostIOMMUDevice { + Object parent_obj; + + char *name; + void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ + PCIBus *aliased_bus; + int aliased_devfn; + HostIOMMUDeviceCaps caps; +}; + +/** + * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices. + * + * Different types of host devices (e.g., VFIO or VDPA device) or devices + * with different backend (e.g., VFIO legacy container or IOMMUFD backend) + * will have different implementations of the HostIOMMUDeviceClass. + */ +struct HostIOMMUDeviceClass { + ObjectClass parent_class; + + /** + * @realize: initialize host IOMMU device instance further. + * + * Mandatory callback. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @opaque: pointer to agent device of this host IOMMU device, + * e.g., VFIO base device or VDPA device. + * + * @errp: pass an Error out when realize fails. + * + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); + /** + * @get_cap: check if a host IOMMU device capability is supported. + * + * Optional callback, if not implemented, hint not supporting query + * of @cap. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @cap: capability to check. + * + * @errp: pass an Error out when fails to query capability. + * + * Returns: <0 on failure, 0 if a @cap is unsupported, or else + * 1 or some positive value for some special @cap, + * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. + */ + int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); + /** + * @get_iova_ranges: Return the list of usable iova_ranges along with + * @hiod Host IOMMU device + * + * @hiod: handle to the host IOMMU device + * @errp: error handle + */ + GList* (*get_iova_ranges)(HostIOMMUDevice *hiod, Error **errp); +}; + +/* + * Host IOMMU device capability list. + */ +#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0 +#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1 + +#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64 +#endif diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h index 293bfbe..9edfec6 100644 --- a/include/sysemu/iommufd.h +++ b/include/sysemu/iommufd.h @@ -1,9 +1,23 @@ +/* + * iommufd container backend declaration + * + * Copyright (C) 2024 Intel Corporation. + * Copyright Red Hat, Inc. 2024 + * + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> + * Zhenzhong Duan <zhenzhong.duan@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + #ifndef SYSEMU_IOMMUFD_H #define SYSEMU_IOMMUFD_H #include "qom/object.h" #include "exec/hwaddr.h" #include "exec/cpu-common.h" +#include "sysemu/host_iommu_device.h" #define TYPE_IOMMUFD_BACKEND "iommufd" OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) @@ -33,4 +47,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size); +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp); + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" #endif diff --git a/system/memory.c b/system/memory.c index 47c600d..2d69521 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1914,19 +1914,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, return ret; } -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_iova_ranges) { - ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp); - } - return ret; -} - int memory_region_register_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n, Error **errp) { diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 69a08e8..a2640cf 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1779,7 +1779,9 @@ static int priv_spec_from_str(const char *priv_spec_str) { int priv_version = -1; - if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) { + if (!g_strcmp0(priv_spec_str, PRIV_VER_1_13_0_STR)) { + priv_version = PRIV_VERSION_1_13_0; + } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_12_0_STR)) { priv_version = PRIV_VERSION_1_12_0; } else if (!g_strcmp0(priv_spec_str, PRIV_VER_1_11_0_STR)) { priv_version = PRIV_VERSION_1_11_0; @@ -1790,7 +1792,7 @@ static int priv_spec_from_str(const char *priv_spec_str) return priv_version; } -static const char *priv_spec_to_str(int priv_version) +const char *priv_spec_to_str(int priv_version) { switch (priv_version) { case PRIV_VERSION_1_10_0: @@ -1799,6 +1801,8 @@ static const char *priv_spec_to_str(int priv_version) return PRIV_VER_1_11_0_STR; case PRIV_VERSION_1_12_0: return PRIV_VER_1_12_0_STR; + case PRIV_VERSION_1_13_0: + return PRIV_VER_1_13_0_STR; default: return NULL; } @@ -2246,6 +2250,402 @@ RISCVCPUProfile *riscv_profiles[] = { NULL, }; +static RISCVCPUImpliedExtsRule RVA_IMPLIED = { + .is_misa = true, + .ext = RVA, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zalrsc), CPU_CFG_OFFSET(ext_zaamo), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVD_IMPLIED = { + .is_misa = true, + .ext = RVD, + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule RVF_IMPLIED = { + .is_misa = true, + .ext = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVM_IMPLIED = { + .is_misa = true, + .ext = RVM, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zmmul), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule RVV_IMPLIED = { + .is_misa = true, + .ext = RVV, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64d), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCD_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcd), + .implied_misa_exts = RVD, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCE_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zce), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zcb), CPU_CFG_OFFSET(ext_zcmp), + CPU_CFG_OFFSET(ext_zcmt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCF_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcf), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCMP_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcmp), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZCMT_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zcmt), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zca), CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZDINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zdinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfinx), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZFA_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfa), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFBFMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfbfmin), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFH_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfh), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfhmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZFHMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfhmin), + .implied_misa_exts = RVF, + .implied_multi_exts = { RISCV_IMPLIED_EXTS_RULE_END }, +}; + +static RISCVCPUImpliedExtsRule ZFINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zfinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZHINX_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zhinx), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zhinxmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZHINXMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zhinxmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zfinx), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZICNTR_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zicntr), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZIHPM_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zihpm), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZK_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zk), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zkn), CPU_CFG_OFFSET(ext_zkr), + CPU_CFG_OFFSET(ext_zkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZKN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zkn), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc), + CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zkne), + CPU_CFG_OFFSET(ext_zknd), CPU_CFG_OFFSET(ext_zknh), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZKS_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zks), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zbkb), CPU_CFG_OFFSET(ext_zbkc), + CPU_CFG_OFFSET(ext_zbkx), CPU_CFG_OFFSET(ext_zksed), + CPU_CFG_OFFSET(ext_zksh), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVBB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvbb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkb), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE32F_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve32f), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE32X_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve32x), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zicsr), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64D_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64d), + .implied_misa_exts = RVD, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64F_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64f), + .implied_misa_exts = RVF, + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), CPU_CFG_OFFSET(ext_zve64x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVE64X_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zve64x), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFBFMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfbfmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFBFWMA_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfbfwma), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvfbfmin), CPU_CFG_OFFSET(ext_zfbfmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFH_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfh), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvfhmin), CPU_CFG_OFFSET(ext_zfhmin), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVFHMIN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvfhmin), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve32f), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKN_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvkn), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkned), CPU_CFG_OFFSET(ext_zvknhb), + CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNC_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvknc), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvbc), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNG_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvkng), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvkn), CPU_CFG_OFFSET(ext_zvkg), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKNHB_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvknhb), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zve64x), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKS_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvks), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvksed), CPU_CFG_OFFSET(ext_zvksh), + CPU_CFG_OFFSET(ext_zvkb), CPU_CFG_OFFSET(ext_zvkt), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKSC_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvksc), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvbc), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +static RISCVCPUImpliedExtsRule ZVKSG_IMPLIED = { + .ext = CPU_CFG_OFFSET(ext_zvksg), + .implied_multi_exts = { + CPU_CFG_OFFSET(ext_zvks), CPU_CFG_OFFSET(ext_zvkg), + + RISCV_IMPLIED_EXTS_RULE_END + }, +}; + +RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[] = { + &RVA_IMPLIED, &RVD_IMPLIED, &RVF_IMPLIED, + &RVM_IMPLIED, &RVV_IMPLIED, NULL +}; + +RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[] = { + &ZCB_IMPLIED, &ZCD_IMPLIED, &ZCE_IMPLIED, + &ZCF_IMPLIED, &ZCMP_IMPLIED, &ZCMT_IMPLIED, + &ZDINX_IMPLIED, &ZFA_IMPLIED, &ZFBFMIN_IMPLIED, + &ZFH_IMPLIED, &ZFHMIN_IMPLIED, &ZFINX_IMPLIED, + &ZHINX_IMPLIED, &ZHINXMIN_IMPLIED, &ZICNTR_IMPLIED, + &ZIHPM_IMPLIED, &ZK_IMPLIED, &ZKN_IMPLIED, + &ZKS_IMPLIED, &ZVBB_IMPLIED, &ZVE32F_IMPLIED, + &ZVE32X_IMPLIED, &ZVE64D_IMPLIED, &ZVE64F_IMPLIED, + &ZVE64X_IMPLIED, &ZVFBFMIN_IMPLIED, &ZVFBFWMA_IMPLIED, + &ZVFH_IMPLIED, &ZVFHMIN_IMPLIED, &ZVKN_IMPLIED, + &ZVKNC_IMPLIED, &ZVKNG_IMPLIED, &ZVKNHB_IMPLIED, + &ZVKS_IMPLIED, &ZVKSC_IMPLIED, &ZVKSG_IMPLIED, + NULL +}; + static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 6fe0d71..8774204 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -96,12 +96,14 @@ extern RISCVCPUProfile *riscv_profiles[]; #define PRIV_VER_1_10_0_STR "v1.10.0" #define PRIV_VER_1_11_0_STR "v1.11.0" #define PRIV_VER_1_12_0_STR "v1.12.0" +#define PRIV_VER_1_13_0_STR "v1.13.0" enum { PRIV_VERSION_1_10_0 = 0, PRIV_VERSION_1_11_0, PRIV_VERSION_1_12_0, + PRIV_VERSION_1_13_0, - PRIV_VERSION_LATEST = PRIV_VERSION_1_12_0, + PRIV_VERSION_LATEST = PRIV_VERSION_1_13_0, }; #define VEXT_VERSION_1_00_0 0x00010000 @@ -122,6 +124,29 @@ typedef enum { EXT_STATUS_DIRTY, } RISCVExtStatus; +typedef struct riscv_cpu_implied_exts_rule { +#ifndef CONFIG_USER_ONLY + /* + * Bitmask indicates the rule enabled status for the harts. + * This enhancement is only available in system-mode QEMU, + * as we don't have a good way (e.g. mhartid) to distinguish + * the SMP cores in user-mode QEMU. + */ + unsigned long *enabled; +#endif + /* True if this is a MISA implied rule. */ + bool is_misa; + /* ext is MISA bit if is_misa flag is true, else multi extension offset. */ + const uint32_t ext; + const uint32_t implied_misa_exts; + const uint32_t implied_multi_exts[]; +} RISCVCPUImpliedExtsRule; + +extern RISCVCPUImpliedExtsRule *riscv_misa_ext_implied_rules[]; +extern RISCVCPUImpliedExtsRule *riscv_multi_ext_implied_rules[]; + +#define RISCV_IMPLIED_EXTS_RULE_END -1 + #define MMU_USER_IDX 3 #define MAX_RISCV_PMPS (16) @@ -830,4 +855,5 @@ const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit); /* Implemented in th_csr.c */ void th_register_custom_csrs(RISCVCPU *cpu); +const char *priv_spec_to_str(int priv_version); #endif /* RISCV_CPU_H */ diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 74318a9..c257c5e 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -156,6 +156,8 @@ /* 32-bit only */ #define CSR_MSTATUSH 0x310 +#define CSR_MEDELEGH 0x312 +#define CSR_HEDELEGH 0x612 /* Machine Trap Handling */ #define CSR_MSCRATCH 0x340 @@ -315,6 +317,7 @@ #define SMSTATEEN0_CS (1ULL << 0) #define SMSTATEEN0_FCSR (1ULL << 1) #define SMSTATEEN0_JVT (1ULL << 2) +#define SMSTATEEN0_P1P13 (1ULL << 56) #define SMSTATEEN0_HSCONTXT (1ULL << 57) #define SMSTATEEN0_IMSIC (1ULL << 58) #define SMSTATEEN0_AIA (1ULL << 59) @@ -670,6 +673,8 @@ typedef enum RISCVException { RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */ RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */ RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */ + RISCV_EXCP_SW_CHECK = 0x12, /* since: priv-1.13.0 */ + RISCV_EXCP_HW_ERR = 0x13, /* since: priv-1.13.0 */ RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14, RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15, RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16, @@ -695,7 +700,8 @@ typedef enum RISCVException { #define IRQ_M_EXT 11 #define IRQ_S_GEXT 12 #define IRQ_PMU_OVF 13 -#define IRQ_LOCAL_MAX 16 +#define IRQ_LOCAL_MAX 64 +/* -1 is due to bit zero of hgeip and hgeie being ROZ. */ #define IRQ_LOCAL_GUEST_MAX (TARGET_LONG_BITS - 1) /* mip masks */ diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index e1e4f32..fb7eebd 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -136,6 +136,7 @@ struct RISCVCPUConfig { * TCG always implement/can't be user disabled, * based on spec version. */ + bool has_priv_1_13; bool has_priv_1_12; bool has_priv_1_11; diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 58ef707..432c59d 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -762,14 +762,18 @@ static RISCVException write_vcsr(CPURISCVState *env, int csrno, } /* User Timers and Counters */ -static target_ulong get_ticks(bool shift) +static target_ulong get_ticks(bool shift, bool instructions) { int64_t val; target_ulong result; #if !defined(CONFIG_USER_ONLY) if (icount_enabled()) { - val = icount_get(); + if (instructions) { + val = icount_get_raw(); + } else { + val = icount_get(); + } } else { val = cpu_get_host_ticks(); } @@ -804,14 +808,14 @@ static RISCVException read_timeh(CPURISCVState *env, int csrno, static RISCVException read_hpmcounter(CPURISCVState *env, int csrno, target_ulong *val) { - *val = get_ticks(false); + *val = get_ticks(false, (csrno == CSR_INSTRET)); return RISCV_EXCP_NONE; } static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno, target_ulong *val) { - *val = get_ticks(true); + *val = get_ticks(true, (csrno == CSR_INSTRETH)); return RISCV_EXCP_NONE; } @@ -875,11 +879,11 @@ static RISCVException write_mhpmcounter(CPURISCVState *env, int csrno, int ctr_idx = csrno - CSR_MCYCLE; PMUCTRState *counter = &env->pmu_ctrs[ctr_idx]; uint64_t mhpmctr_val = val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); counter->mhpmcounter_val = val; - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - counter->mhpmcounter_prev = get_ticks(false); + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + counter->mhpmcounter_prev = get_ticks(false, instr); if (ctr_idx > 2) { if (riscv_cpu_mxl(env) == MXL_RV32) { mhpmctr_val = mhpmctr_val | @@ -902,12 +906,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState *env, int csrno, PMUCTRState *counter = &env->pmu_ctrs[ctr_idx]; uint64_t mhpmctr_val = counter->mhpmcounter_val; uint64_t mhpmctrh_val = val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); counter->mhpmcounterh_val = val; mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32); - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - counter->mhpmcounterh_prev = get_ticks(true); + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + counter->mhpmcounterh_prev = get_ticks(true, instr); if (ctr_idx > 2) { riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx); } @@ -926,6 +930,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val, counter->mhpmcounter_prev; target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val : counter->mhpmcounter_val; + bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx); if (get_field(env->mcountinhibit, BIT(ctr_idx))) { /* @@ -946,9 +951,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState *env, target_ulong *val, * The kernel computes the perf delta by subtracting the current value from * the value it initialized previously (ctr_val). */ - if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || - riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) { - *val = get_ticks(upper_half) - ctr_prev + ctr_val; + if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) { + *val = get_ticks(upper_half, instr) - ctr_prev + ctr_val; } else { *val = ctr_val; } @@ -1145,7 +1149,14 @@ static RISCVException write_stimecmph(CPURISCVState *env, int csrno, #define VSTOPI_NUM_SRCS 5 -#define LOCAL_INTERRUPTS (~0x1FFF) +/* + * All core local interrupts except the fixed ones 0:12. This macro is for + * virtual interrupts logic so please don't change this to avoid messing up + * the whole support, For reference see AIA spec: `5.3 Interrupt filtering and + * virtual interrupts for supervisor level` and `6.3.2 Virtual interrupts for + * VS level`. + */ +#define LOCAL_INTERRUPTS (~0x1FFFULL) static const uint64_t delegable_ints = S_MODE_INTERRUPTS | VS_MODE_INTERRUPTS | MIP_LCOFIP; @@ -1197,18 +1208,18 @@ static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE | */ /* Bit STIP can be an alias of mip.STIP that's why it's writable in mvip. */ -static const target_ulong mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP | +static const uint64_t mvip_writable_mask = MIP_SSIP | MIP_STIP | MIP_SEIP | LOCAL_INTERRUPTS; -static const target_ulong mvien_writable_mask = MIP_SSIP | MIP_SEIP | +static const uint64_t mvien_writable_mask = MIP_SSIP | MIP_SEIP | LOCAL_INTERRUPTS; -static const target_ulong sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS; -static const target_ulong hip_writable_mask = MIP_VSSIP; -static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | +static const uint64_t sip_writable_mask = SIP_SSIP | LOCAL_INTERRUPTS; +static const uint64_t hip_writable_mask = MIP_VSSIP; +static const uint64_t hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP | LOCAL_INTERRUPTS; -static const target_ulong hvien_writable_mask = LOCAL_INTERRUPTS; +static const uint64_t hvien_writable_mask = LOCAL_INTERRUPTS; -static const target_ulong vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS; +static const uint64_t vsip_writable_mask = MIP_VSSIP | LOCAL_INTERRUPTS; const bool valid_vm_1_10_32[16] = { [VM_1_10_MBARE] = true, @@ -2245,6 +2256,10 @@ static RISCVException write_mstateen0(CPURISCVState *env, int csrno, wr_mask |= SMSTATEEN0_FCSR; } + if (env->priv_ver >= PRIV_VERSION_1_13_0) { + wr_mask |= SMSTATEEN0_P1P13; + } + return write_mstateen(env, csrno, wr_mask, new_val); } @@ -2280,6 +2295,10 @@ static RISCVException write_mstateen0h(CPURISCVState *env, int csrno, { uint64_t wr_mask = SMSTATEEN_STATEEN | SMSTATEEN0_HSENVCFG; + if (env->priv_ver >= PRIV_VERSION_1_13_0) { + wr_mask |= SMSTATEEN0_P1P13; + } + return write_mstateenh(env, csrno, wr_mask, new_val); } @@ -3214,6 +3233,33 @@ static RISCVException write_hedeleg(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } +static RISCVException read_hedelegh(CPURISCVState *env, int csrno, + target_ulong *val) +{ + RISCVException ret; + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + + /* Reserved, now read zero */ + *val = 0; + return RISCV_EXCP_NONE; +} + +static RISCVException write_hedelegh(CPURISCVState *env, int csrno, + target_ulong val) +{ + RISCVException ret; + ret = smstateen_acc_ok(env, 0, SMSTATEEN0_P1P13); + if (ret != RISCV_EXCP_NONE) { + return ret; + } + + /* Reserved, now write ignore */ + return RISCV_EXCP_NONE; +} + static RISCVException rmw_hvien64(CPURISCVState *env, int csrno, uint64_t *ret_val, uint64_t new_val, uint64_t wr_mask) @@ -4618,6 +4664,10 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { [CSR_MSTATUSH] = { "mstatush", any32, read_mstatush, write_mstatush }, + [CSR_MEDELEGH] = { "medelegh", any32, read_zero, write_ignore, + .min_priv_ver = PRIV_VERSION_1_13_0 }, + [CSR_HEDELEGH] = { "hedelegh", hmode32, read_hedelegh, write_hedelegh, + .min_priv_ver = PRIV_VERSION_1_13_0 }, /* Machine Trap Handling */ [CSR_MSCRATCH] = { "mscratch", any, read_mscratch, write_mscratch, diff --git a/target/riscv/debug.c b/target/riscv/debug.c index b110370..0b5099f 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -241,6 +241,76 @@ static void do_trigger_action(CPURISCVState *env, target_ulong trigger_index) } } +/* + * Check the privilege level of specific trigger matches CPU's current privilege + * level. + */ +static bool trigger_priv_match(CPURISCVState *env, trigger_type_t type, + int trigger_index) +{ + target_ulong ctrl = env->tdata1[trigger_index]; + + switch (type) { + case TRIGGER_TYPE_AD_MATCH: + /* type 2 trigger cannot be fired in VU/VS mode */ + if (env->virt_enabled) { + return false; + } + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 3) & BIT(env->priv)) { + return true; + } + break; + case TRIGGER_TYPE_AD_MATCH6: + if (env->virt_enabled) { + /* check VU/VS bit against current privilege level */ + if ((ctrl >> 23) & BIT(env->priv)) { + return true; + } + } else { + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 3) & BIT(env->priv)) { + return true; + } + } + break; + case TRIGGER_TYPE_INST_CNT: + if (env->virt_enabled) { + /* check VU/VS bit against current privilege level */ + if ((ctrl >> 25) & BIT(env->priv)) { + return true; + } + } else { + /* check U/S/M bit against current privilege level */ + if ((ctrl >> 6) & BIT(env->priv)) { + return true; + } + } + break; + case TRIGGER_TYPE_INT: + case TRIGGER_TYPE_EXCP: + case TRIGGER_TYPE_EXT_SRC: + qemu_log_mask(LOG_UNIMP, "trigger type: %d is not supported\n", type); + break; + case TRIGGER_TYPE_NO_EXIST: + case TRIGGER_TYPE_UNAVAIL: + qemu_log_mask(LOG_GUEST_ERROR, "trigger type: %d does not exist\n", + type); + break; + default: + g_assert_not_reached(); + } + + return false; +} + +/* Common matching conditions for all types of the triggers. */ +static bool trigger_common_match(CPURISCVState *env, trigger_type_t type, + int trigger_index) +{ + return trigger_priv_match(env, type, trigger_index); +} + /* type 2 trigger */ static uint32_t type2_breakpoint_size(CPURISCVState *env, target_ulong ctrl) @@ -554,7 +624,7 @@ void helper_itrigger_match(CPURISCVState *env) if (get_trigger_type(env, i) != TRIGGER_TYPE_INST_CNT) { continue; } - if (check_itrigger_priv(env, i)) { + if (!trigger_common_match(env, TRIGGER_TYPE_INST_CNT, i)) { continue; } count = itrigger_get_count(env, i); @@ -785,22 +855,18 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) for (i = 0; i < RV_MAX_TRIGGERS; i++) { trigger_type = get_trigger_type(env, i); + if (!trigger_common_match(env, trigger_type, i)) { + continue; + } + switch (trigger_type) { case TRIGGER_TYPE_AD_MATCH: - /* type 2 trigger cannot be fired in VU/VS mode */ - if (env->virt_enabled) { - return false; - } - ctrl = env->tdata1[i]; pc = env->tdata2[i]; if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } + env->badaddr = pc; + return true; } break; case TRIGGER_TYPE_AD_MATCH6: @@ -808,19 +874,8 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs) pc = env->tdata2[i]; if ((ctrl & TYPE6_EXEC) && (bp->pc == pc)) { - if (env->virt_enabled) { - /* check VU/VS bit against current privilege level */ - if ((ctrl >> 23) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } - } else { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - env->badaddr = pc; - return true; - } - } + env->badaddr = pc; + return true; } break; default: @@ -846,13 +901,12 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) for (i = 0; i < RV_MAX_TRIGGERS; i++) { trigger_type = get_trigger_type(env, i); + if (!trigger_common_match(env, trigger_type, i)) { + continue; + } + switch (trigger_type) { case TRIGGER_TYPE_AD_MATCH: - /* type 2 trigger cannot be fired in VU/VS mode */ - if (env->virt_enabled) { - return false; - } - ctrl = env->tdata1[i]; addr = env->tdata2[i]; flags = 0; @@ -865,10 +919,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) } if ((wp->flags & flags) && (wp->vaddr == addr)) { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - return true; - } + return true; } break; case TRIGGER_TYPE_AD_MATCH6: @@ -884,17 +935,7 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) } if ((wp->flags & flags) && (wp->vaddr == addr)) { - if (env->virt_enabled) { - /* check VU/VS bit against current privilege level */ - if ((ctrl >> 23) & BIT(env->priv)) { - return true; - } - } else { - /* check U/S/M bit against current privilege level */ - if ((ctrl >> 3) & BIT(env->priv)) { - return true; - } - } + return true; } break; default: diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 871a70a..91b1a56 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -676,7 +676,7 @@ uint64_t helper_fround_h(CPURISCVState *env, uint64_t rs1) uint64_t helper_froundnx_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_s(env, rs1); + float16 frs1 = check_nanbox_h(env, rs1); frs1 = float16_round_to_int(frs1, &env->fp_status); return nanbox_h(env, frs1); } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 235e2cd..1047961 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1555,6 +1555,21 @@ static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run) return ret; } +static bool kvm_riscv_handle_debug(CPUState *cs) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + + /* Ensure PC is synchronised */ + kvm_cpu_synchronize_state(cs); + + if (kvm_find_sw_breakpoint(cs, env->pc)) { + return true; + } + + return false; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; @@ -1565,6 +1580,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_RISCV_CSR: ret = kvm_riscv_handle_csr(cs, run); break; + case KVM_EXIT_DEBUG: + if (kvm_riscv_handle_debug(cs)) { + ret = EXCP_DEBUG; + } + break; default: qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", __func__, run->exit_reason); @@ -1969,3 +1989,72 @@ static const TypeInfo riscv_kvm_cpu_type_infos[] = { }; DEFINE_TYPES(riscv_kvm_cpu_type_infos) + +static const uint32_t ebreak_insn = 0x00100073; +static const uint16_t c_ebreak_insn = 0x9002; + +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 0)) { + return -EINVAL; + } + + if ((bp->saved_insn & 0x3) == 0x3) { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) + || cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak_insn, 4, 1)) { + return -EINVAL; + } + } else { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak_insn, 2, 1)) { + return -EINVAL; + } + } + + return 0; +} + +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + uint32_t ebreak; + uint16_t c_ebreak; + + if ((bp->saved_insn & 0x3) == 0x3) { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&ebreak, 4, 0) || + ebreak != ebreak_insn || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { + return -EINVAL; + } + } else { + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&c_ebreak, 2, 0) || + c_ebreak != c_ebreak_insn || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 2, 1)) { + return -EINVAL; + } + } + + return 0; +} + +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + /* TODO; To be implemented later. */ + return -EINVAL; +} + +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + /* TODO; To be implemented later. */ + return -EINVAL; +} + +void kvm_arch_remove_all_hw_breakpoints(void) +{ + /* TODO; To be implemented later. */ +} + +void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) +{ + if (kvm_sw_breakpoints_active(cs)) { + dbg->control |= KVM_GUESTDBG_ENABLE; + } +} diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 683f604..ae25686 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -31,11 +31,17 @@ #include "hw/core/accel-cpu.h" #include "hw/core/tcg-cpu-ops.h" #include "tcg/tcg.h" +#ifndef CONFIG_USER_ONLY +#include "hw/boards.h" +#endif /* Hash that stores user set extensions */ static GHashTable *multi_ext_user_opts; static GHashTable *misa_ext_user_opts; +static GHashTable *multi_ext_implied_rules; +static GHashTable *misa_ext_implied_rules; + static bool cpu_cfg_ext_is_user_set(uint32_t ext_offset) { return g_hash_table_contains(multi_ext_user_opts, @@ -76,16 +82,11 @@ static void riscv_cpu_write_misa_bit(RISCVCPU *cpu, uint32_t bit, static const char *cpu_priv_ver_to_str(int priv_ver) { - switch (priv_ver) { - case PRIV_VERSION_1_10_0: - return "v1.10.0"; - case PRIV_VERSION_1_11_0: - return "v1.11.0"; - case PRIV_VERSION_1_12_0: - return "v1.12.0"; - } + const char *priv_spec_str = priv_spec_to_str(priv_ver); - g_assert_not_reached(); + g_assert(priv_spec_str); + + return priv_spec_str; } static void riscv_cpu_synchronize_from_tb(CPUState *cs, @@ -323,6 +324,10 @@ static void riscv_cpu_update_named_features(RISCVCPU *cpu) cpu->cfg.has_priv_1_12 = true; } + if (cpu->env.priv_ver >= PRIV_VERSION_1_13_0) { + cpu->cfg.has_priv_1_13 = true; + } + /* zic64b is 1.12 or later */ cpu->cfg.ext_zic64b = cpu->cfg.cbom_blocksize == 64 && cpu->cfg.cbop_blocksize == 64 && @@ -466,10 +471,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (cpu->cfg.ext_zfh) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zfhmin), true); - } - if (cpu->cfg.ext_zfhmin && !riscv_has_ext(env, RVF)) { error_setg(errp, "Zfh/Zfhmin extensions require F extension"); return; @@ -491,9 +492,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_propagate(errp, local_err); return; } - - /* The V vector extension depends on the Zve64d extension */ - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64d), true); } /* The Zve64d extension depends on the Zve64f extension */ @@ -502,18 +500,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_setg(errp, "Zve64d/V extensions require D extension"); return; } - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true); - } - - /* The Zve64f extension depends on the Zve64x and Zve32f extensions */ - if (cpu->cfg.ext_zve64f) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64x), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true); - } - - /* The Zve64x extension depends on the Zve32x extension */ - if (cpu->cfg.ext_zve64x) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); } /* The Zve32f extension depends on the Zve32x extension */ @@ -522,11 +508,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) error_setg(errp, "Zve32f/Zve64f extensions require F extension"); return; } - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true); - } - - if (cpu->cfg.ext_zvfh) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvfhmin), true); } if (cpu->cfg.ext_zvfhmin && !cpu->cfg.ext_zve32f) { @@ -549,11 +530,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - /* Set the ISA extensions, checks should have happened above */ - if (cpu->cfg.ext_zhinx) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - } - if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) { error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx"); return; @@ -571,27 +547,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) } } - if (cpu->cfg.ext_zce) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); - } - } - - /* zca, zcd and zcf has a PRIV 1.12.0 restriction */ - if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); - if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); - } - if (riscv_has_ext(env, RVD)) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true); - } - } - if (mcc->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { error_setg(errp, "Zcf extension is only relevant to RV32"); return; @@ -625,48 +580,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - /* - * Shorthand vector crypto extensions - */ - if (cpu->cfg.ext_zvknc) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - - if (cpu->cfg.ext_zvkng) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true); - } - - if (cpu->cfg.ext_zvkn) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkned), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvknhb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true); - } - - if (cpu->cfg.ext_zvksc) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - - if (cpu->cfg.ext_zvksg) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvks), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkg), true); - } - - if (cpu->cfg.ext_zvks) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksed), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvksh), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvkt), true); - } - - if (cpu->cfg.ext_zvkt) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true); - } - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32x) { @@ -682,29 +595,6 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } - if (cpu->cfg.ext_zk) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkn), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkr), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkt), true); - } - - if (cpu->cfg.ext_zkn) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkne), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknd), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknh), true); - } - - if (cpu->cfg.ext_zks) { - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksed), true); - cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksh), true); - } - if (cpu->cfg.ext_zicntr && !cpu->cfg.ext_zicsr) { if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_zicntr))) { error_setg(errp, "zicntr requires zicsr"); @@ -833,11 +723,151 @@ static void riscv_cpu_validate_profiles(RISCVCPU *cpu) } } +static void riscv_cpu_init_implied_exts_rules(void) +{ + RISCVCPUImpliedExtsRule *rule; +#ifndef CONFIG_USER_ONLY + MachineState *ms = MACHINE(qdev_get_machine()); +#endif + static bool initialized; + int i; + + /* Implied rules only need to be initialized once. */ + if (initialized) { + return; + } + + for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) { +#ifndef CONFIG_USER_ONLY + rule->enabled = bitmap_new(ms->smp.cpus); +#endif + g_hash_table_insert(misa_ext_implied_rules, + GUINT_TO_POINTER(rule->ext), (gpointer)rule); + } + + for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) { +#ifndef CONFIG_USER_ONLY + rule->enabled = bitmap_new(ms->smp.cpus); +#endif + g_hash_table_insert(multi_ext_implied_rules, + GUINT_TO_POINTER(rule->ext), (gpointer)rule); + } + + initialized = true; +} + +static void cpu_enable_implied_rule(RISCVCPU *cpu, + RISCVCPUImpliedExtsRule *rule) +{ + CPURISCVState *env = &cpu->env; + RISCVCPUImpliedExtsRule *ir; + bool enabled = false; + int i; + +#ifndef CONFIG_USER_ONLY + enabled = test_bit(cpu->env.mhartid, rule->enabled); +#endif + + if (!enabled) { + /* Enable the implied MISAs. */ + if (rule->implied_misa_exts) { + riscv_cpu_set_misa_ext(env, + env->misa_ext | rule->implied_misa_exts); + + for (i = 0; misa_bits[i] != 0; i++) { + if (rule->implied_misa_exts & misa_bits[i]) { + ir = g_hash_table_lookup(misa_ext_implied_rules, + GUINT_TO_POINTER(misa_bits[i])); + + if (ir) { + cpu_enable_implied_rule(cpu, ir); + } + } + } + } + + /* Enable the implied extensions. */ + for (i = 0; + rule->implied_multi_exts[i] != RISCV_IMPLIED_EXTS_RULE_END; i++) { + cpu_cfg_ext_auto_update(cpu, rule->implied_multi_exts[i], true); + + ir = g_hash_table_lookup(multi_ext_implied_rules, + GUINT_TO_POINTER( + rule->implied_multi_exts[i])); + + if (ir) { + cpu_enable_implied_rule(cpu, ir); + } + } + +#ifndef CONFIG_USER_ONLY + bitmap_set(rule->enabled, cpu->env.mhartid, 1); +#endif + } +} + +/* Zc extension has special implied rules that need to be handled separately. */ +static void cpu_enable_zc_implied_rules(RISCVCPU *cpu) +{ + RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); + CPURISCVState *env = &cpu->env; + + if (cpu->cfg.ext_zce) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); + + if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + } + + /* Zca, Zcd and Zcf has a PRIV 1.12.0 restriction */ + if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + + if (riscv_has_ext(env, RVF) && mcc->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + + if (riscv_has_ext(env, RVD)) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true); + } + } +} + +static void riscv_cpu_enable_implied_rules(RISCVCPU *cpu) +{ + RISCVCPUImpliedExtsRule *rule; + int i; + + /* Enable the implied extensions for Zc. */ + cpu_enable_zc_implied_rules(cpu); + + /* Enable the implied MISAs. */ + for (i = 0; (rule = riscv_misa_ext_implied_rules[i]); i++) { + if (riscv_has_ext(&cpu->env, rule->ext)) { + cpu_enable_implied_rule(cpu, rule); + } + } + + /* Enable the implied extensions. */ + for (i = 0; (rule = riscv_multi_ext_implied_rules[i]); i++) { + if (isa_ext_is_enabled(cpu, rule->ext)) { + cpu_enable_implied_rule(cpu, rule); + } + } +} + void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp) { CPURISCVState *env = &cpu->env; Error *local_err = NULL; + riscv_cpu_init_implied_exts_rules(); + riscv_cpu_enable_implied_rules(cpu); + riscv_cpu_validate_misa_priv(env, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -1343,6 +1373,15 @@ static void riscv_tcg_cpu_instance_init(CPUState *cs) misa_ext_user_opts = g_hash_table_new(NULL, g_direct_equal); multi_ext_user_opts = g_hash_table_new(NULL, g_direct_equal); + + if (!misa_ext_implied_rules) { + misa_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal); + } + + if (!multi_ext_implied_rules) { + multi_ext_implied_rules = g_hash_table_new(NULL, g_direct_equal); + } + riscv_cpu_add_user_properties(obj); if (riscv_cpu_has_max_extensions(obj)) { |