aboutsummaryrefslogtreecommitdiff
path: root/hw/virtio/virtio-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/virtio/virtio-iommu.c')
-rw-r--r--hw/virtio/virtio-iommu.c428
1 files changed, 269 insertions, 159 deletions
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 1326c6e..59ef4fb 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping {
uint32_t flags;
} VirtIOIOMMUMapping;
+struct hiod_key {
+ PCIBus *bus;
+ uint8_t devfn;
+};
+
static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
{
return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
@@ -303,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep)
if (!ep->domain) {
return;
}
+ trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id);
g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb,
ep->iommu_mr);
QLIST_REMOVE(ep, next);
@@ -462,8 +468,245 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
return &sdev->as;
}
+static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+ const struct hiod_key *key1 = v1;
+ const struct hiod_key *key2 = v2;
+
+ return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static guint hiod_hash(gconstpointer v)
+{
+ const struct hiod_key *key = v;
+ guint value = (guint)(uintptr_t)key->bus;
+
+ return (guint)(value << 8 | key->devfn);
+}
+
+static void hiod_destroy(gpointer v)
+{
+ object_unref(v);
+}
+
+static HostIOMMUDevice *
+get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
+ struct hiod_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ return g_hash_table_lookup(viommu->host_iommu_devices, &key);
+}
+
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+ GList *l;
+ int i = 0;
+
+ /* free the existing list and rebuild it from scratch */
+ g_list_free_full(sdev->resv_regions, g_free);
+ sdev->resv_regions = NULL;
+
+ /* First add host reserved regions if any, all tagged as RESERVED */
+ for (l = sdev->host_resv_ranges; l; l = l->next) {
+ ReservedRegion *reg = g_new0(ReservedRegion, 1);
+ Range *r = (Range *)l->data;
+
+ reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+ range_set_bounds(&reg->range, range_lob(r), range_upb(r));
+ sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+ trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+ range_lob(&reg->range),
+ range_upb(&reg->range));
+ i++;
+ }
+ /*
+ * then add higher priority reserved regions set by the machine
+ * through properties
+ */
+ add_prop_resv_regions(sdev);
+ return 0;
+}
+
+static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+ int devfn, GList *iova_ranges,
+ Error **errp)
+{
+ IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+ IOMMUDevice *sdev;
+ int ret = -EINVAL;
+
+ if (!sbus) {
+ error_setg(errp, "%s: no IOMMUPciBus found!", __func__);
+ return ret;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ error_setg(errp, "%s: no IOMMUDevice found!", __func__);
+ return ret;
+ }
+
+ if (sdev->host_resv_ranges) {
+ error_setg(errp, "%s virtio-iommu does not support aliased BDF",
+ __func__);
+ return ret;
+ }
+
+ range_inverse_array(iova_ranges,
+ &sdev->host_resv_ranges,
+ 0, UINT64_MAX);
+ rebuild_resv_regions(sdev);
+
+ return 0;
+}
+
+static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+ int devfn)
+{
+ IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+ IOMMUDevice *sdev;
+
+ if (!sbus) {
+ return;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ return;
+ }
+
+ g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free);
+ g_list_free_full(sdev->resv_regions, g_free);
+ sdev->host_resv_ranges = NULL;
+ sdev->resv_regions = NULL;
+ add_prop_resv_regions(sdev);
+}
+
+
+static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask,
+ Error **errp)
+{
+ uint64_t cur_mask = viommu->config.page_size_mask;
+
+ if ((cur_mask & new_mask) == 0) {
+ error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64
+ " incompatible with currently supported mask 0x%"PRIx64,
+ new_mask, cur_mask);
+ return false;
+ }
+ /*
+ * Once the granule is frozen we can't change the mask anymore. If by
+ * chance the hotplugged device supports the same granule, we can still
+ * accept it.
+ */
+ if (viommu->granule_frozen) {
+ int cur_granule = ctz64(cur_mask);
+
+ if (!(BIT_ULL(cur_granule) & new_mask)) {
+ error_setg(errp,
+ "virtio-iommu does not support frozen granule 0x%llx",
+ BIT_ULL(cur_granule));
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ ERRP_GUARD();
+ VirtIOIOMMU *viommu = opaque;
+ HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+ struct hiod_key *new_key;
+ GList *host_iova_ranges = NULL;
+
+ assert(hiod);
+
+ if (get_host_iommu_device(viommu, bus, devfn)) {
+ error_setg(errp, "Host IOMMU device already exists");
+ return false;
+ }
+
+ if (hiodc->get_iova_ranges) {
+ int ret;
+ host_iova_ranges = hiodc->get_iova_ranges(hiod);
+ if (!host_iova_ranges) {
+ return true; /* some old kernels may not support that capability */
+ }
+ ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus,
+ hiod->aliased_devfn,
+ host_iova_ranges, errp);
+ if (ret) {
+ goto error;
+ }
+ }
+ if (hiodc->get_page_size_mask) {
+ uint64_t new_mask = hiodc->get_page_size_mask(hiod);
+
+ if (check_page_size_mask(viommu, new_mask, errp)) {
+ /*
+ * The default mask depends on the "granule" property. For example,
+ * with 4k granule, it is -(4 * KiB). When an assigned device has
+ * page size restrictions due to the hardware IOMMU configuration,
+ * apply this restriction to the mask.
+ */
+ trace_virtio_iommu_update_page_size_mask(hiod->name,
+ viommu->config.page_size_mask,
+ new_mask);
+ if (!viommu->granule_frozen) {
+ viommu->config.page_size_mask &= new_mask;
+ }
+ } else {
+ error_prepend(errp, "%s: ", hiod->name);
+ goto error;
+ }
+ }
+
+ new_key = g_malloc(sizeof(*new_key));
+ new_key->bus = bus;
+ new_key->devfn = devfn;
+
+ object_ref(hiod);
+ g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod);
+ g_list_free_full(host_iova_ranges, g_free);
+
+ return true;
+error:
+ g_list_free_full(host_iova_ranges, g_free);
+ return false;
+}
+
+static void
+virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+ VirtIOIOMMU *viommu = opaque;
+ HostIOMMUDevice *hiod;
+ struct hiod_key key = {
+ .bus = bus,
+ .devfn = devfn,
+ };
+
+ hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key);
+ if (!hiod) {
+ return;
+ }
+ virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus,
+ hiod->aliased_devfn);
+
+ g_hash_table_remove(viommu->host_iommu_devices, &key);
+}
+
static const PCIIOMMUOps virtio_iommu_ops = {
.get_address_space = virtio_iommu_find_add_as,
+ .set_iommu_device = virtio_iommu_set_iommu_device,
+ .unset_iommu_device = virtio_iommu_unset_iommu_device,
};
static int virtio_iommu_attach(VirtIOIOMMU *s,
@@ -544,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
if (QLIST_EMPTY(&domain->endpoint_list)) {
g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id));
}
+ g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id));
return VIRTIO_IOMMU_S_OK;
}
@@ -706,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s,
}
buf += count;
free -= count;
- sdev->probe_done = true;
return VIRTIO_IOMMU_S_OK;
}
@@ -782,6 +1025,9 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
iov = elem->out_sg;
sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head));
if (unlikely(sz != sizeof(head))) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: read %zu bytes from command head"
+ "but expected %zu\n", __func__, sz, sizeof(head));
tail.status = VIRTIO_IOMMU_S_DEVERR;
goto out;
}
@@ -818,6 +1064,25 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
out:
sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
buf ? buf : &tail, output_size);
+ if (unlikely(sz != output_size)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: wrote %zu bytes to command response"
+ "but response size is %zu\n",
+ __func__, sz, output_size);
+ tail.status = VIRTIO_IOMMU_S_DEVERR;
+ /*
+ * We checked that sizeof(tail) can fit to elem->in_sg at the
+ * beginning of the loop
+ */
+ output_size = sizeof(tail);
+ g_free(buf);
+ buf = NULL;
+ sz = iov_from_buf(elem->in_sg,
+ elem->in_num,
+ 0,
+ &tail,
+ output_size);
+ }
assert(sz == output_size);
virtqueue_push(vq, elem, sz);
@@ -1115,150 +1380,6 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr,
return 0;
}
-/*
- * The default mask depends on the "granule" property. For example, with
- * 4k granule, it is -(4 * KiB). When an assigned device has page size
- * restrictions due to the hardware IOMMU configuration, apply this restriction
- * to the mask.
- */
-static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
- uint64_t new_mask,
- Error **errp)
-{
- IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
- VirtIOIOMMU *s = sdev->viommu;
- uint64_t cur_mask = s->config.page_size_mask;
-
- trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask,
- new_mask);
-
- if ((cur_mask & new_mask) == 0) {
- error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64
- " incompatible with currently supported mask 0x%"PRIx64,
- mr->parent_obj.name, new_mask, cur_mask);
- return -1;
- }
-
- /*
- * Once the granule is frozen we can't change the mask anymore. If by
- * chance the hotplugged device supports the same granule, we can still
- * accept it.
- */
- if (s->granule_frozen) {
- int cur_granule = ctz64(cur_mask);
-
- if (!(BIT_ULL(cur_granule) & new_mask)) {
- error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx",
- mr->parent_obj.name, BIT_ULL(cur_granule));
- return -1;
- }
- return 0;
- }
-
- s->config.page_size_mask &= new_mask;
- return 0;
-}
-
-/**
- * rebuild_resv_regions: rebuild resv regions with both the
- * info of host resv ranges and property set resv ranges
- */
-static int rebuild_resv_regions(IOMMUDevice *sdev)
-{
- GList *l;
- int i = 0;
-
- /* free the existing list and rebuild it from scratch */
- g_list_free_full(sdev->resv_regions, g_free);
- sdev->resv_regions = NULL;
-
- /* First add host reserved regions if any, all tagged as RESERVED */
- for (l = sdev->host_resv_ranges; l; l = l->next) {
- ReservedRegion *reg = g_new0(ReservedRegion, 1);
- Range *r = (Range *)l->data;
-
- reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
- range_set_bounds(&reg->range, range_lob(r), range_upb(r));
- sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
- trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
- range_lob(&reg->range),
- range_upb(&reg->range));
- i++;
- }
- /*
- * then add higher priority reserved regions set by the machine
- * through properties
- */
- add_prop_resv_regions(sdev);
- return 0;
-}
-
-/**
- * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
- *
- * The function turns those into reserved ranges. Once some
- * reserved ranges have been set, new reserved regions cannot be
- * added outside of the original ones.
- *
- * @mr: IOMMU MR
- * @iova_ranges: list of usable IOVA ranges
- * @errp: error handle
- */
-static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr,
- GList *iova_ranges,
- Error **errp)
-{
- IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
- GList *current_ranges = sdev->host_resv_ranges;
- GList *l, *tmp, *new_ranges = NULL;
- int ret = -EINVAL;
-
- /* check that each new resv region is included in an existing one */
- if (sdev->host_resv_ranges) {
- range_inverse_array(iova_ranges,
- &new_ranges,
- 0, UINT64_MAX);
-
- for (tmp = new_ranges; tmp; tmp = tmp->next) {
- Range *newr = (Range *)tmp->data;
- bool included = false;
-
- for (l = current_ranges; l; l = l->next) {
- Range * r = (Range *)l->data;
-
- if (range_contains_range(r, newr)) {
- included = true;
- break;
- }
- }
- if (!included) {
- goto error;
- }
- }
- /* all new reserved ranges are included in existing ones */
- ret = 0;
- goto out;
- }
-
- if (sdev->probe_done) {
- warn_report("%s: Notified about new host reserved regions after probe",
- mr->parent_obj.name);
- }
-
- range_inverse_array(iova_ranges,
- &sdev->host_resv_ranges,
- 0, UINT64_MAX);
- rebuild_resv_regions(sdev);
-
- return 0;
-error:
- error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!",
- mr->parent_obj.name);
-out:
- g_list_free_full(new_ranges, g_free);
- return ret;
-}
-
static void virtio_iommu_system_reset(void *opaque)
{
VirtIOIOMMU *s = opaque;
@@ -1281,18 +1402,6 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data)
VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done);
int granule;
- if (likely(s->config.bypass)) {
- /*
- * Transient IOMMU MR enable to collect page_size_mask requirements
- * through memory_region_iommu_set_page_size_mask() called by
- * VFIO region_add() callback
- */
- s->config.bypass = false;
- virtio_iommu_switch_address_space_all(s);
- /* restore default */
- s->config.bypass = true;
- virtio_iommu_switch_address_space_all(s);
- }
s->granule_frozen = true;
granule = ctz64(s->config.page_size_mask);
trace_virtio_iommu_freeze_granule(BIT_ULL(granule));
@@ -1357,6 +1466,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
+ s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal,
+ g_free, hiod_destroy);
+
if (s->primary_bus) {
pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
} else {
@@ -1580,8 +1692,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass,
imrc->translate = virtio_iommu_translate;
imrc->replay = virtio_iommu_replay;
imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
- imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
- imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges;
}
static const TypeInfo virtio_iommu_info = {