diff options
Diffstat (limited to 'hw/virtio/virtio-iommu.c')
-rw-r--r-- | hw/virtio/virtio-iommu.c | 428 |
1 files changed, 269 insertions, 159 deletions
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1326c6e..59ef4fb 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping { uint32_t flags; } VirtIOIOMMUMapping; +struct hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ -303,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) if (!ep->domain) { return; } + trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id); g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, ep->iommu_mr); QLIST_REMOVE(ep, next); @@ -462,8 +468,245 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct hiod_key *key1 = v1; + const struct hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static guint hiod_hash(gconstpointer v) +{ + const struct hiod_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + +static void hiod_destroy(gpointer v) +{ + object_unref(v); +} + +static HostIOMMUDevice * +get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + return g_hash_table_lookup(viommu->host_iommu_devices, &key); +} + +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn, GList *iova_ranges, + Error **errp) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + int ret = -EINVAL; + + if (!sbus) { + error_setg(errp, "%s: no IOMMUPciBus found!", __func__); + return ret; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + error_setg(errp, "%s: no IOMMUDevice found!", __func__); + return ret; + } + + if (sdev->host_resv_ranges) { + error_setg(errp, "%s virtio-iommu does not support aliased BDF", + __func__); + return ret; + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +} + +static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free); + g_list_free_full(sdev->resv_regions, g_free); + sdev->host_resv_ranges = NULL; + sdev->resv_regions = NULL; + add_prop_resv_regions(sdev); +} + + +static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask, + Error **errp) +{ + uint64_t cur_mask = viommu->config.page_size_mask; + + if ((cur_mask & new_mask) == 0) { + error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64 + " incompatible with currently supported mask 0x%"PRIx64, + new_mask, cur_mask); + return false; + } + /* + * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still + * accept it. + */ + if (viommu->granule_frozen) { + int cur_granule = ctz64(cur_mask); + + if (!(BIT_ULL(cur_granule) & new_mask)) { + error_setg(errp, + "virtio-iommu does not support frozen granule 0x%llx", + BIT_ULL(cur_granule)); + return false; + } + } + return true; +} + +static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + ERRP_GUARD(); + VirtIOIOMMU *viommu = opaque; + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + struct hiod_key *new_key; + GList *host_iova_ranges = NULL; + + assert(hiod); + + if (get_host_iommu_device(viommu, bus, devfn)) { + error_setg(errp, "Host IOMMU device already exists"); + return false; + } + + if (hiodc->get_iova_ranges) { + int ret; + host_iova_ranges = hiodc->get_iova_ranges(hiod); + if (!host_iova_ranges) { + return true; /* some old kernels may not support that capability */ + } + ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn, + host_iova_ranges, errp); + if (ret) { + goto error; + } + } + if (hiodc->get_page_size_mask) { + uint64_t new_mask = hiodc->get_page_size_mask(hiod); + + if (check_page_size_mask(viommu, new_mask, errp)) { + /* + * The default mask depends on the "granule" property. For example, + * with 4k granule, it is -(4 * KiB). When an assigned device has + * page size restrictions due to the hardware IOMMU configuration, + * apply this restriction to the mask. + */ + trace_virtio_iommu_update_page_size_mask(hiod->name, + viommu->config.page_size_mask, + new_mask); + if (!viommu->granule_frozen) { + viommu->config.page_size_mask &= new_mask; + } + } else { + error_prepend(errp, "%s: ", hiod->name); + goto error; + } + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + g_list_free_full(host_iova_ranges, g_free); + + return true; +error: + g_list_free_full(host_iova_ranges, g_free); + return false; +} + +static void +virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDevice *hiod; + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); + if (!hiod) { + return; + } + virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn); + + g_hash_table_remove(viommu->host_iommu_devices, &key); +} + static const PCIIOMMUOps virtio_iommu_ops = { .get_address_space = virtio_iommu_find_add_as, + .set_iommu_device = virtio_iommu_set_iommu_device, + .unset_iommu_device = virtio_iommu_unset_iommu_device, }; static int virtio_iommu_attach(VirtIOIOMMU *s, @@ -544,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s, if (QLIST_EMPTY(&domain->endpoint_list)) { g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); } + g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id)); return VIRTIO_IOMMU_S_OK; } @@ -706,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, } buf += count; free -= count; - sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } @@ -782,6 +1025,9 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) iov = elem->out_sg; sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); if (unlikely(sz != sizeof(head))) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read %zu bytes from command head" + "but expected %zu\n", __func__, sz, sizeof(head)); tail.status = VIRTIO_IOMMU_S_DEVERR; goto out; } @@ -818,6 +1064,25 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) out: sz = iov_from_buf(elem->in_sg, elem->in_num, 0, buf ? buf : &tail, output_size); + if (unlikely(sz != output_size)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: wrote %zu bytes to command response" + "but response size is %zu\n", + __func__, sz, output_size); + tail.status = VIRTIO_IOMMU_S_DEVERR; + /* + * We checked that sizeof(tail) can fit to elem->in_sg at the + * beginning of the loop + */ + output_size = sizeof(tail); + g_free(buf); + buf = NULL; + sz = iov_from_buf(elem->in_sg, + elem->in_num, + 0, + &tail, + output_size); + } assert(sz == output_size); virtqueue_push(vq, elem, sz); @@ -1115,150 +1380,6 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, return 0; } -/* - * The default mask depends on the "granule" property. For example, with - * 4k granule, it is -(4 * KiB). When an assigned device has page size - * restrictions due to the hardware IOMMU configuration, apply this restriction - * to the mask. - */ -static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - uint64_t new_mask, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - VirtIOIOMMU *s = sdev->viommu; - uint64_t cur_mask = s->config.page_size_mask; - - trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, - new_mask); - - if ((cur_mask & new_mask) == 0) { - error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 - " incompatible with currently supported mask 0x%"PRIx64, - mr->parent_obj.name, new_mask, cur_mask); - return -1; - } - - /* - * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still - * accept it. - */ - if (s->granule_frozen) { - int cur_granule = ctz64(cur_mask); - - if (!(BIT_ULL(cur_granule) & new_mask)) { - error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", - mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; - } - return 0; - } - - s->config.page_size_mask &= new_mask; - return 0; -} - -/** - * rebuild_resv_regions: rebuild resv regions with both the - * info of host resv ranges and property set resv ranges - */ -static int rebuild_resv_regions(IOMMUDevice *sdev) -{ - GList *l; - int i = 0; - - /* free the existing list and rebuild it from scratch */ - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - - /* First add host reserved regions if any, all tagged as RESERVED */ - for (l = sdev->host_resv_ranges; l; l = l->next) { - ReservedRegion *reg = g_new0(ReservedRegion, 1); - Range *r = (Range *)l->data; - - reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; - range_set_bounds(®->range, range_lob(r), range_upb(r)); - sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); - trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, - range_lob(®->range), - range_upb(®->range)); - i++; - } - /* - * then add higher priority reserved regions set by the machine - * through properties - */ - add_prop_resv_regions(sdev); - return 0; -} - -/** - * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges - * - * The function turns those into reserved ranges. Once some - * reserved ranges have been set, new reserved regions cannot be - * added outside of the original ones. - * - * @mr: IOMMU MR - * @iova_ranges: list of usable IOVA ranges - * @errp: error handle - */ -static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - GList *current_ranges = sdev->host_resv_ranges; - GList *l, *tmp, *new_ranges = NULL; - int ret = -EINVAL; - - /* check that each new resv region is included in an existing one */ - if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; - } - - if (sdev->probe_done) { - warn_report("%s: Notified about new host reserved regions after probe", - mr->parent_obj.name); - } - - range_inverse_array(iova_ranges, - &sdev->host_resv_ranges, - 0, UINT64_MAX); - rebuild_resv_regions(sdev); - - return 0; -error: - error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", - mr->parent_obj.name); -out: - g_list_free_full(new_ranges, g_free); - return ret; -} - static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1281,18 +1402,6 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); int granule; - if (likely(s->config.bypass)) { - /* - * Transient IOMMU MR enable to collect page_size_mask requirements - * through memory_region_iommu_set_page_size_mask() called by - * VFIO region_add() callback - */ - s->config.bypass = false; - virtio_iommu_switch_address_space_all(s); - /* restore default */ - s->config.bypass = true; - virtio_iommu_switch_address_space_all(s); - } s->granule_frozen = true; granule = ctz64(s->config.page_size_mask); trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); @@ -1357,6 +1466,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, + g_free, hiod_destroy); + if (s->primary_bus) { pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { @@ -1580,8 +1692,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->translate = virtio_iommu_translate; imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; - imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; - imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { |