diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-02-07 14:38:53 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-02-07 14:38:53 +0000 |
commit | ea62da0913d20338b8a47bbfaef2e8f2763ee13f (patch) | |
tree | 916e3f25f4bf7799c77a9045e8097ccb59ecb6a1 | |
parent | 0833df03f4206a6cf416fbb3d380fa95c8e61fba (diff) | |
parent | db32d0f43839627f54a1a7f8eee17baa770f52d2 (diff) | |
download | qemu-ea62da0913d20338b8a47bbfaef2e8f2763ee13f.zip qemu-ea62da0913d20338b8a47bbfaef2e8f2763ee13f.tar.gz qemu-ea62da0913d20338b8a47bbfaef2e8f2763ee13f.tar.bz2 |
Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20180206.0' into staging
VFIO updates 2018-02-06
- SPAPR in-kernel TCE accleration (Alexey Kardashevskiy)
- MSI-X relocation (Alex Williamson)
- Add missing platform mutex init (Eric Auger)
- Redundant variable cleanup (Alexey Kardashevskiy)
- Option to disable GeForce quirks (Alex Williamson)
# gpg: Signature made Tue 06 Feb 2018 18:21:22 GMT
# gpg: using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg: aka "Alex Williamson <alex@shazbot.org>"
# gpg: aka "Alex Williamson <alwillia@redhat.com>"
# gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22
* remotes/awilliam/tags/vfio-update-20180206.0:
vfio/pci: Add option to disable GeForce quirks
vfio/common: Remove redundant copy of local variable
hw/vfio/platform: Init the interrupt mutex
vfio/pci: Allow relocating MSI-X MMIO
qapi: Create DEFINE_PROP_OFF_AUTO_PCIBAR
vfio/pci: Emulate BARs
vfio/pci: Add base BAR MemoryRegion
vfio/pci: Fixup VFIOMSIXInfo comment
spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device
vfio/spapr: Use iommu memory region's get_attr()
memory/iommu: Add get_attr()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/core/qdev-properties.c | 11 | ||||
-rw-r--r-- | hw/ppc/spapr_iommu.c | 18 | ||||
-rw-r--r-- | hw/vfio/common.c | 28 | ||||
-rw-r--r-- | hw/vfio/pci-quirks.c | 9 | ||||
-rw-r--r-- | hw/vfio/pci.c | 195 | ||||
-rw-r--r-- | hw/vfio/pci.h | 7 | ||||
-rw-r--r-- | hw/vfio/platform.c | 2 | ||||
-rw-r--r-- | hw/vfio/trace-events | 3 | ||||
-rw-r--r-- | include/exec/memory.h | 22 | ||||
-rw-r--r-- | include/hw/qdev-properties.h | 4 | ||||
-rw-r--r-- | memory.c | 13 | ||||
-rw-r--r-- | qapi/common.json | 26 | ||||
-rw-r--r-- | target/ppc/kvm.c | 7 | ||||
-rw-r--r-- | target/ppc/kvm_ppc.h | 6 |
14 files changed, 320 insertions, 31 deletions
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 24c1780..5bbc2d9 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1317,3 +1317,14 @@ const PropertyInfo qdev_prop_link = { .name = "link", .create = create_link_property, }; + +/* --- OffAutoPCIBAR off/auto/bar0/bar1/bar2/bar3/bar4/bar5 --- */ + +const PropertyInfo qdev_prop_off_auto_pcibar = { + .name = "OffAutoPCIBAR", + .description = "off/auto/bar0/bar1/bar2/bar3/bar4/bar5", + .enum_table = &OffAutoPCIBAR_lookup, + .get = get_enum, + .set = set_enum, + .set_default_value = set_default_value_enum, +}; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 5ccd785..aaa6010 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -160,6 +160,19 @@ static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu) return 1ULL << tcet->page_shift; } +static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu, + enum IOMMUMemoryRegionAttr attr, void *data) +{ + sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); + + if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) { + *(int *) data = tcet->fd; + return 0; + } + + return -EINVAL; +} + static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, IOMMUNotifierFlag old, IOMMUNotifierFlag new) @@ -284,6 +297,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio) tcet->need_vfio = need_vfio; + if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) { + return; + } + oldtable = tcet->table; tcet->table = spapr_tce_alloc_table(tcet->liobn, @@ -643,6 +660,7 @@ static void spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data) imrc->translate = spapr_tce_translate_iommu; imrc->get_min_page_size = spapr_tce_get_min_page_size; imrc->notify_flag_changed = spapr_tce_notify_flag_changed; + imrc->get_attr = spapr_tce_get_attr; } static const TypeInfo spapr_iommu_memory_region_info = { diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b77be3a..ee9240d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -435,7 +435,6 @@ static void vfio_listener_region_add(MemoryListener *listener, end = int128_get64(int128_sub(llend, int128_one())); if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - VFIOHostDMAWindow *hostwin; hwaddr pgsize = 0; /* For now intersections are not allowed, we may relax this later */ @@ -457,6 +456,33 @@ static void vfio_listener_region_add(MemoryListener *listener, vfio_host_win_add(container, section->offset_within_address_space, section->offset_within_address_space + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_report("vfio: failed to setup fd %d " + "for a group with fd %d: %s", + param.tablefd, param.groupfd, + strerror(errno)); + return; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif } hostwin_found = false; diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 60ad5fb..e5779a7 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -542,7 +542,8 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) VFIOQuirk *quirk; VFIONvidia3d0Quirk *data; - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || + if (vdev->no_geforce_quirks || + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || !vdev->bars[1].region.size) { return; } @@ -660,7 +661,8 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) VFIONvidiaBAR5Quirk *bar5; VFIOConfigWindowQuirk *window; - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || + if (vdev->no_geforce_quirks || + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || !vdev->vga || nr != 5 || !vdev->bars[5].ioport) { return; } @@ -754,7 +756,8 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) VFIOQuirk *quirk; VFIOConfigMirrorQuirk *mirror; - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || + if (vdev->no_geforce_quirks || + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || !vfio_is_vga(vdev) || nr != 0) { return; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 2c71295..879510c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1087,7 +1087,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIORegion *region = &vdev->bars[bar].region; - MemoryRegion *mmap_mr, *mr; + MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; pcibus_t bar_addr; uint64_t size = region->size; @@ -1100,7 +1100,8 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) r = &pdev->io_regions[bar]; bar_addr = r->addr; - mr = region->mem; + base_mr = vdev->bars[bar].mr; + region_mr = region->mem; mmap_mr = ®ion->mmaps[0].mem; /* If BAR is mapped and page aligned, update to fill PAGE_SIZE */ @@ -1111,12 +1112,15 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) memory_region_transaction_begin(); - memory_region_set_size(mr, size); + if (vdev->bars[bar].size < size) { + memory_region_set_size(base_mr, size); + } + memory_region_set_size(region_mr, size); memory_region_set_size(mmap_mr, size); - if (size != region->size && memory_region_is_mapped(mr)) { - memory_region_del_subregion(r->address_space, mr); + if (size != vdev->bars[bar].size && memory_region_is_mapped(base_mr)) { + memory_region_del_subregion(r->address_space, base_mr); memory_region_add_subregion_overlap(r->address_space, - bar_addr, mr, 0); + bar_addr, base_mr, 0); } memory_region_transaction_commit(); @@ -1218,8 +1222,8 @@ void vfio_pci_write_config(PCIDevice *pdev, for (bar = 0; bar < PCI_ROM_SLOT; bar++) { if (old_addr[bar] != pdev->io_regions[bar].addr && - pdev->io_regions[bar].size > 0 && - pdev->io_regions[bar].size < qemu_real_host_page_size) { + vdev->bars[bar].region.size > 0 && + vdev->bars[bar].region.size < qemu_real_host_page_size) { vfio_sub_page_bar_update_mapping(pdev, bar); } } @@ -1352,6 +1356,98 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) } } +static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp) +{ + int target_bar = -1; + size_t msix_sz; + + if (!vdev->msix || vdev->msix_relo == OFF_AUTOPCIBAR_OFF) { + return; + } + + /* The actual minimum size of MSI-X structures */ + msix_sz = (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE) + + (QEMU_ALIGN_UP(vdev->msix->entries, 64) / 8); + /* Round up to host pages, we don't want to share a page */ + msix_sz = REAL_HOST_PAGE_ALIGN(msix_sz); + /* PCI BARs must be a power of 2 */ + msix_sz = pow2ceil(msix_sz); + + if (vdev->msix_relo == OFF_AUTOPCIBAR_AUTO) { + /* + * TODO: Lookup table for known devices. + * + * Logically we might use an algorithm here to select the BAR adding + * the least additional MMIO space, but we cannot programatically + * predict the driver dependency on BAR ordering or sizing, therefore + * 'auto' becomes a lookup for combinations reported to work. + */ + if (target_bar < 0) { + error_setg(errp, "No automatic MSI-X relocation available for " + "device %04x:%04x", vdev->vendor_id, vdev->device_id); + return; + } + } else { + target_bar = (int)(vdev->msix_relo - OFF_AUTOPCIBAR_BAR0); + } + + /* I/O port BARs cannot host MSI-X structures */ + if (vdev->bars[target_bar].ioport) { + error_setg(errp, "Invalid MSI-X relocation BAR %d, " + "I/O port BAR", target_bar); + return; + } + + /* Cannot use a BAR in the "shadow" of a 64-bit BAR */ + if (!vdev->bars[target_bar].size && + target_bar > 0 && vdev->bars[target_bar - 1].mem64) { + error_setg(errp, "Invalid MSI-X relocation BAR %d, " + "consumed by 64-bit BAR %d", target_bar, target_bar - 1); + return; + } + + /* 2GB max size for 32-bit BARs, cannot double if already > 1G */ + if (vdev->bars[target_bar].size > (1 * 1024 * 1024 * 1024) && + !vdev->bars[target_bar].mem64) { + error_setg(errp, "Invalid MSI-X relocation BAR %d, " + "no space to extend 32-bit BAR", target_bar); + return; + } + + /* + * If adding a new BAR, test if we can make it 64bit. We make it + * prefetchable since QEMU MSI-X emulation has no read side effects + * and doing so makes mapping more flexible. + */ + if (!vdev->bars[target_bar].size) { + if (target_bar < (PCI_ROM_SLOT - 1) && + !vdev->bars[target_bar + 1].size) { + vdev->bars[target_bar].mem64 = true; + vdev->bars[target_bar].type = PCI_BASE_ADDRESS_MEM_TYPE_64; + } + vdev->bars[target_bar].type |= PCI_BASE_ADDRESS_MEM_PREFETCH; + vdev->bars[target_bar].size = msix_sz; + vdev->msix->table_offset = 0; + } else { + vdev->bars[target_bar].size = MAX(vdev->bars[target_bar].size * 2, + msix_sz * 2); + /* + * Due to above size calc, MSI-X always starts halfway into the BAR, + * which will always be a separate host page. + */ + vdev->msix->table_offset = vdev->bars[target_bar].size / 2; + } + + vdev->msix->table_bar = target_bar; + vdev->msix->pba_bar = target_bar; + /* Requires 8-byte alignment, but PCI_MSIX_ENTRY_SIZE guarantees that */ + vdev->msix->pba_offset = vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE); + + trace_vfio_msix_relo(vdev->vbasedev.name, + vdev->msix->table_bar, vdev->msix->table_offset); +} + /* * We don't have any control over how pci_add_capability() inserts * capabilities into the chain. In order to setup MSI-X we need a @@ -1430,6 +1526,8 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) vdev->msix = msix; vfio_pci_fixup_msix_region(vdev); + + vfio_pci_relocate_msix(vdev, errp); } static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) @@ -1440,9 +1538,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, - vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->table_bar].mr, vdev->msix->table_bar, vdev->msix->table_offset, - vdev->bars[vdev->msix->pba_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].mr, vdev->msix->pba_bar, vdev->msix->pba_offset, pos, &err); if (ret < 0) { @@ -1482,8 +1580,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) if (vdev->msix) { msix_uninit(&vdev->pdev, - vdev->bars[vdev->msix->table_bar].region.mem, - vdev->bars[vdev->msix->pba_bar].region.mem); + vdev->bars[vdev->msix->table_bar].mr, + vdev->bars[vdev->msix->pba_bar].mr); g_free(vdev->msix->pending); } } @@ -1500,12 +1598,11 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) } } -static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) +static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; uint32_t pci_bar; - uint8_t type; int ret; /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ @@ -1524,23 +1621,52 @@ static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) pci_bar = le32_to_cpu(pci_bar); bar->ioport = (pci_bar & PCI_BASE_ADDRESS_SPACE_IO); bar->mem64 = bar->ioport ? 0 : (pci_bar & PCI_BASE_ADDRESS_MEM_TYPE_64); - type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : - ~PCI_BASE_ADDRESS_MEM_MASK); + bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : + ~PCI_BASE_ADDRESS_MEM_MASK); + bar->size = bar->region.size; +} + +static void vfio_bars_prepare(VFIOPCIDevice *vdev) +{ + int i; - if (vfio_region_mmap(&bar->region)) { - error_report("Failed to mmap %s BAR %d. Performance may be slow", - vdev->vbasedev.name, nr); + for (i = 0; i < PCI_ROM_SLOT; i++) { + vfio_bar_prepare(vdev, i); } +} + +static void vfio_bar_register(VFIOPCIDevice *vdev, int nr) +{ + VFIOBAR *bar = &vdev->bars[nr]; + char *name; - pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); + if (!bar->size) { + return; + } + + bar->mr = g_new0(MemoryRegion, 1); + name = g_strdup_printf("%s base BAR %d", vdev->vbasedev.name, nr); + memory_region_init_io(bar->mr, OBJECT(vdev), NULL, NULL, name, bar->size); + g_free(name); + + if (bar->region.size) { + memory_region_add_subregion(bar->mr, 0, bar->region.mem); + + if (vfio_region_mmap(&bar->region)) { + error_report("Failed to mmap %s BAR %d. Performance may be slow", + vdev->vbasedev.name, nr); + } + } + + pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr); } -static void vfio_bars_setup(VFIOPCIDevice *vdev) +static void vfio_bars_register(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_bar_setup(vdev, i); + vfio_bar_register(vdev, i); } } @@ -1549,8 +1675,13 @@ static void vfio_bars_exit(VFIOPCIDevice *vdev) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { + VFIOBAR *bar = &vdev->bars[i]; + vfio_bar_quirk_exit(vdev, i); - vfio_region_exit(&vdev->bars[i].region); + vfio_region_exit(&bar->region); + if (bar->region.size) { + memory_region_del_subregion(bar->mr, bar->region.mem); + } } if (vdev->vga) { @@ -1564,8 +1695,14 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { + VFIOBAR *bar = &vdev->bars[i]; + vfio_bar_quirk_finalize(vdev, i); - vfio_region_finalize(&vdev->bars[i].region); + vfio_region_finalize(&bar->region); + if (bar->size) { + object_unparent(OBJECT(bar->mr)); + g_free(bar->mr); + } } if (vdev->vga) { @@ -2734,6 +2871,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) /* QEMU can choose to expose the ROM or not */ memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); + /* QEMU can also add or extend BARs */ + memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); /* * The PCI spec reserves vendor ID 0xffff as an invalid value. The @@ -2804,13 +2943,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_pci_size_rom(vdev); + vfio_bars_prepare(vdev); + vfio_msix_early_setup(vdev, &err); if (err) { error_propagate(errp, err); goto error; } - vfio_bars_setup(vdev); + vfio_bars_register(vdev); ret = vfio_add_capabilities(vdev, errp); if (ret) { @@ -2989,6 +3130,8 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, @@ -2999,6 +3142,8 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, nv_gpudirect_clique, qdev_prop_nv_gpudirect_clique, uint8_t), + DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, + OFF_AUTOPCIBAR_OFF), /* * TODO - support passed fds... is this necessary? * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index a8fb3b3..f4aa13e 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -33,6 +33,9 @@ typedef struct VFIOQuirk { typedef struct VFIOBAR { VFIORegion region; + MemoryRegion *mr; + size_t size; + uint8_t type; bool ioport; bool mem64; QLIST_HEAD(, VFIOQuirk) quirks; @@ -86,7 +89,7 @@ enum { VFIO_INT_MSIX = 3, }; -/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ +/* Cache of MSI-X setup */ typedef struct VFIOMSIXInfo { uint8_t table_bar; uint8_t pba_bar; @@ -132,6 +135,7 @@ typedef struct VFIOPCIDevice { (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT) int32_t bootindex; uint32_t igd_gms; + OffAutoPCIBAR msix_relo; uint8_t pm_cap; uint8_t nv_gpudirect_clique; bool pci_aer; @@ -142,6 +146,7 @@ typedef struct VFIOPCIDevice { bool no_kvm_intx; bool no_kvm_msi; bool no_kvm_msix; + bool no_geforce_quirks; } VFIOPCIDevice; uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index da84abf..0d4bc0a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -643,6 +643,8 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) vbasedev->dev = dev; vbasedev->ops = &vfio_platform_ops; + qemu_mutex_init(&vdev->intp_mutex); + trace_vfio_platform_realize(vbasedev->sysfsdev ? vbasedev->sysfsdev : vbasedev->name, vdev->compat); diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index fae096c..79f63a2 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -16,6 +16,8 @@ vfio_msix_pba_disable(const char *name) " (%s)" vfio_msix_pba_enable(const char *name) " (%s)" vfio_msix_disable(const char *name) " (%s)" vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" +vfio_msix_relo_cost(const char *name, int bar, uint64_t cost) " (%s) BAR %d cost 0x%"PRIx64"" +vfio_msix_relo(const char *name, int bar, uint64_t offset) " (%s) BAR %d offset 0x%"PRIx64"" vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" vfio_msi_disable(const char *name) " (%s)" vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" @@ -123,3 +125,4 @@ vfio_prereg_register(uint64_t va, uint64_t size, int ret) "va=0x%"PRIx64" size=0 vfio_prereg_unregister(uint64_t va, uint64_t size, int ret) "va=0x%"PRIx64" size=0x%"PRIx64" ret=%d" vfio_spapr_create_window(int ps, uint64_t ws, uint64_t off) "pageshift=0x%x winsize=0x%"PRIx64" offset=0x%"PRIx64 vfio_spapr_remove_window(uint64_t off) "offset=0x%"PRIx64 +vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" diff --git a/include/exec/memory.h b/include/exec/memory.h index 07c5d6d..3ef8399 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -190,6 +190,10 @@ struct MemoryRegionOps { const MemoryRegionMmio old_mmio; }; +enum IOMMUMemoryRegionAttr { + IOMMU_ATTR_SPAPR_TCE_FD +}; + typedef struct IOMMUMemoryRegionClass { /* private */ struct DeviceClass parent_class; @@ -210,6 +214,10 @@ typedef struct IOMMUMemoryRegionClass { IOMMUNotifierFlag new_flags); /* Set this up to provide customized IOMMU replay function */ void (*replay)(IOMMUMemoryRegion *iommu, IOMMUNotifier *notifier); + + /* Get IOMMU misc attributes */ + int (*get_attr)(IOMMUMemoryRegion *iommu, enum IOMMUMemoryRegionAttr, + void *data); } IOMMUMemoryRegionClass; typedef struct CoalescedMemoryRange CoalescedMemoryRange; @@ -927,6 +935,20 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n); /** + * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is + * defined on the IOMMU. + * + * Returns 0 if succeded, error code otherwise. + * + * @iommu_mr: the memory region + * @attr: the requested attribute + * @data: a pointer to the requested attribute data + */ +int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, + enum IOMMUMemoryRegionAttr attr, + void *data); + +/** * memory_region_name: get a memory region's name * * Returns the string that was used to initialize the memory region. diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 5bbfec6..1d61a35 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -34,6 +34,7 @@ extern const PropertyInfo qdev_prop_pci_host_devaddr; extern const PropertyInfo qdev_prop_uuid; extern const PropertyInfo qdev_prop_arraylen; extern const PropertyInfo qdev_prop_link; +extern const PropertyInfo qdev_prop_off_auto_pcibar; #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ .name = (_name), \ @@ -214,6 +215,9 @@ extern const PropertyInfo qdev_prop_link; DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) #define DEFINE_PROP_MEMORY_REGION(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, MemoryRegion *) +#define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \ + OffAutoPCIBAR) #define DEFINE_PROP_UUID(_name, _state, _field) { \ .name = (_name), \ @@ -1922,6 +1922,19 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, } } +int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, + enum IOMMUMemoryRegionAttr attr, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + + if (!imrc->get_attr) { + return -EINVAL; + } + + return imrc->get_attr(iommu_mr, attr, data); +} + void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) { uint8_t mask = 1 << client; diff --git a/qapi/common.json b/qapi/common.json index 6eb0182..d9b14dd 100644 --- a/qapi/common.json +++ b/qapi/common.json @@ -100,3 +100,29 @@ { 'alternate': 'StrOrNull', 'data': { 's': 'str', 'n': 'null' } } + +## +# @OffAutoPCIBAR: +# +# An enumeration of options for specifying a PCI BAR +# +# @off: The specified feature is disabled +# +# @auto: The PCI BAR for the feature is automatically selected +# +# @bar0: PCI BAR0 is used for the feature +# +# @bar1: PCI BAR1 is used for the feature +# +# @bar2: PCI BAR2 is used for the feature +# +# @bar3: PCI BAR3 is used for the feature +# +# @bar4: PCI BAR4 is used for the feature +# +# @bar5: PCI BAR5 is used for the feature +# +# Since: 2.12 +## +{ 'enum': 'OffAutoPCIBAR', + 'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 84284d5..9842b3b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -137,7 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); - cap_spapr_vfio = false; + cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); @@ -2514,6 +2514,11 @@ int kvmppc_get_cap_safe_indirect_branch(void) return cap_ppc_safe_indirect_branch; } +bool kvmppc_has_cap_spapr_vfio(void) +{ + return cap_spapr_vfio; +} + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 39830ba..4d2789e 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -46,6 +46,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); int kvmppc_reset_htab(int shift_hint); uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); +bool kvmppc_has_cap_spapr_vfio(void); #endif /* !CONFIG_USER_ONLY */ bool kvmppc_has_cap_epr(void); int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); @@ -232,6 +233,11 @@ static inline bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) return true; } +static inline bool kvmppc_has_cap_spapr_vfio(void) +{ + return false; +} + #endif /* !CONFIG_USER_ONLY */ static inline bool kvmppc_has_cap_epr(void) |