aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-07-24 12:58:46 +1000
committerRichard Henderson <richard.henderson@linaro.org>2024-07-24 12:58:46 +1000
commitdd4bc5f1cfe92531d654c71e7703b3895afb05df (patch)
tree791927709bdc48597c9fdf51c3741cdf7b6dbda4
parent43f59bf76535b8842a762f8625b96091611aac11 (diff)
parent30b9167785177ac43d11b881fe321918124aeb88 (diff)
downloadqemu-dd4bc5f1cfe92531d654c71e7703b3895afb05df.zip
qemu-dd4bc5f1cfe92531d654c71e7703b3895afb05df.tar.gz
qemu-dd4bc5f1cfe92531d654c71e7703b3895afb05df.tar.bz2
Merge tag 'pull-vfio-20240723-1' of https://github.com/legoater/qemu into staging
vfio queue: * IOMMUFD Dirty Tracking support * Fix for a possible SEGV in IOMMU type1 container * Dropped initialization of host IOMMU device with mdev devices # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmafyVUACgkQUaNDx8/7 # 7KGebRAAzEYxvstDxSPNF+1xx937TKbRpiKYtspTfEgu4Ht50MwO2ZqnVWzTBSwa # qcjhDf2avMBpBvkp4O9fR7nXR0HRN2KvYrBSThZ3Qpqu4KjxCAGcHI5uYmgfizYh # BBLrw3eWME5Ry220TinQF5KFl50vGq7Z/mku5N5Tgj2qfTfCXYK1Kc19SyAga49n # LSokTIjZAGJa4vxrE7THawaEUjFRjfCJey64JUs/TPJaGr4R1snJcWgETww6juUE # 9OSw/xl0AoQhaN/ZTRC1qCsBLUI2MVPsC+x+vqVK62HlTjCx+uDRVQ8KzfDzjCeH # gaLkMjxJSuJZMpm4UU7DBzDGEGcEBCGeNyFt37BSqqPPpX55CcFhj++d8vqTiwpF # YzmTNd/znxcZTw6OJN9sQZohh+NeS86CVZ3x31HD3dXifhRf17jbh7NoIyi+0ZCb # N+mytOH5BXsD+ddwbk+yMaxXV43Fgz7ThG5tB1tjhhNtLZHDA5ezFvGZ5F/FJrqE # xAbjOhz5MC+RcOVNSzQJCULNqFpfE6Gqeys6btEDm/ltf4LpAe6W1HYuv8BJc19T # UsqGK2yKAuQX8GErYxJ1zqZCttVrgpsmXFYTC5iGbxC84mvsF0Iti96IdXz9gfzN # Vlb2OxoefcOwVqIhbkvTZW0ZwYGGDDPAYhLMfr5lSuRqj123OOo= # =cViP # -----END PGP SIGNATURE----- # gpg: Signature made Wed 24 Jul 2024 01:16:37 AM AEST # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@kaod.org>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-vfio-20240723-1' of https://github.com/legoater/qemu: vfio/common: Allow disabling device dirty page tracking vfio/migration: Don't block migration device dirty tracking is unsupported vfio/iommufd: Implement VFIOIOMMUClass::query_dirty_bitmap support vfio/iommufd: Implement VFIOIOMMUClass::set_dirty_tracking support vfio/iommufd: Probe and request hwpt dirty tracking capability vfio/{iommufd, container}: Invoke HostIOMMUDevice::realize() during attach_device() vfio/iommufd: Add hw_caps field to HostIOMMUDeviceCaps vfio/{iommufd,container}: Remove caps::aw_bits vfio/iommufd: Introduce auto domain creation vfio/ccw: Don't initialize HOST_IOMMU_DEVICE with mdev vfio/ap: Don't initialize HOST_IOMMU_DEVICE with mdev vfio/iommufd: Return errno in iommufd_cdev_attach_ioas_hwpt() backends/iommufd: Extend iommufd_backend_get_device_info() to fetch HW capabilities vfio/iommufd: Don't initialize nor set a HOST_IOMMU_DEVICE with mdev vfio/pci: Extract mdev check into an helper hw/vfio/container: Fix SIGSEV on vfio_container_instance_finalize() Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--backends/iommufd.c89
-rw-r--r--backends/trace-events3
-rw-r--r--hw/vfio/ap.c3
-rw-r--r--hw/vfio/ccw.c3
-rw-r--r--hw/vfio/common.c17
-rw-r--r--hw/vfio/container.c10
-rw-r--r--hw/vfio/helpers.c25
-rw-r--r--hw/vfio/iommufd.c196
-rw-r--r--hw/vfio/migration.c12
-rw-r--r--hw/vfio/pci.c26
-rw-r--r--include/hw/vfio/vfio-common.h15
-rw-r--r--include/sysemu/host_iommu_device.h5
-rw-r--r--include/sysemu/iommufd.h13
13 files changed, 377 insertions, 40 deletions
diff --git a/backends/iommufd.c b/backends/iommufd.c
index cabd1b5..9bc466a 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -18,6 +18,7 @@
#include "qemu/error-report.h"
#include "monitor/monitor.h"
#include "trace.h"
+#include "hw/vfio/vfio-common.h"
#include <sys/ioctl.h>
#include <linux/iommufd.h>
@@ -207,9 +208,91 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
return ret;
}
+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
+ uint32_t pt_id, uint32_t flags,
+ uint32_t data_type, uint32_t data_len,
+ void *data_ptr, uint32_t *out_hwpt,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ struct iommu_hwpt_alloc alloc_hwpt = {
+ .size = sizeof(struct iommu_hwpt_alloc),
+ .flags = flags,
+ .dev_id = dev_id,
+ .pt_id = pt_id,
+ .data_type = data_type,
+ .data_len = data_len,
+ .data_uptr = (uintptr_t)data_ptr,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &alloc_hwpt);
+ trace_iommufd_backend_alloc_hwpt(fd, dev_id, pt_id, flags, data_type,
+ data_len, (uintptr_t)data_ptr,
+ alloc_hwpt.out_hwpt_id, ret);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to allocate hwpt");
+ return false;
+ }
+
+ *out_hwpt = alloc_hwpt.out_hwpt_id;
+ return true;
+}
+
+bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be,
+ uint32_t hwpt_id, bool start,
+ Error **errp)
+{
+ int ret;
+ struct iommu_hwpt_set_dirty_tracking set_dirty = {
+ .size = sizeof(set_dirty),
+ .hwpt_id = hwpt_id,
+ .flags = start ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+ };
+
+ ret = ioctl(be->fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &set_dirty);
+ trace_iommufd_backend_set_dirty(be->fd, hwpt_id, start, ret ? errno : 0);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "IOMMU_HWPT_SET_DIRTY_TRACKING(hwpt_id %u) failed",
+ hwpt_id);
+ return false;
+ }
+
+ return true;
+}
+
+bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be,
+ uint32_t hwpt_id,
+ uint64_t iova, ram_addr_t size,
+ uint64_t page_size, uint64_t *data,
+ Error **errp)
+{
+ int ret;
+ struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap = {
+ .size = sizeof(get_dirty_bitmap),
+ .hwpt_id = hwpt_id,
+ .iova = iova,
+ .length = size,
+ .page_size = page_size,
+ .data = (uintptr_t)data,
+ };
+
+ ret = ioctl(be->fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &get_dirty_bitmap);
+ trace_iommufd_backend_get_dirty_bitmap(be->fd, hwpt_id, iova, size,
+ page_size, ret ? errno : 0);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "IOMMU_HWPT_GET_DIRTY_BITMAP (iova: 0x%"HWADDR_PRIx
+ " size: 0x"RAM_ADDR_FMT") failed", iova, size);
+ return false;
+ }
+
+ return true;
+}
+
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- Error **errp)
+ uint64_t *caps, Error **errp)
{
struct iommu_hw_info info = {
.size = sizeof(info),
@@ -225,6 +308,8 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
g_assert(type);
*type = info.out_data_type;
+ g_assert(caps);
+ *caps = info.out_capabilities;
return true;
}
@@ -237,7 +322,7 @@ static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
return caps->type;
case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return caps->aw_bits;
+ return vfio_device_get_aw_bits(hiod->agent);
default:
error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
return -EINVAL;
diff --git a/backends/trace-events b/backends/trace-events
index 211e6f3..40811a3 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -14,4 +14,7 @@ iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size
iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas) " iommufd=%d ioas=%d"
+iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_t flags, uint32_t hwpt_type, uint32_t len, uint64_t data_ptr, uint32_t out_hwpt_id, int ret) " iommufd=%d dev_id=%u pt_id=%u flags=0x%x hwpt_type=%u len=%u data_ptr=0x%"PRIx64" out_hwpt=%u (%d)"
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
+iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
+iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 0c4354e..71bf32b 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -230,6 +230,9 @@ static void vfio_ap_instance_init(Object *obj)
*/
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops,
DEVICE(vapdev), true);
+
+ /* AP device is mdev type device */
+ vbasedev->mdev = true;
}
#ifdef CONFIG_IOMMUFD
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 1f8e127..115862f 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -675,6 +675,9 @@ static void vfio_ccw_instance_init(Object *obj)
VFIOCCWDevice *vcdev = VFIO_CCW(obj);
VFIODevice *vbasedev = &vcdev->vdev;
+ /* CCW device is mdev type device */
+ vbasedev->mdev = true;
+
/*
* All vfio-ccw devices are believed to operate in a way compatible with
* discarding of memory in RAM blocks, ie. pages pinned in the host are
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index cfc44a4..36d0cf6 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -199,6 +199,9 @@ bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
VFIODevice *vbasedev;
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
+ return false;
+ }
if (!vbasedev->dirty_pages_supported) {
return false;
}
@@ -1537,7 +1540,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
{
const VFIOIOMMUClass *ops =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
- HostIOMMUDevice *hiod;
+ HostIOMMUDevice *hiod = NULL;
if (vbasedev->iommufd) {
ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1545,17 +1548,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
assert(ops);
- if (!ops->attach_device(name, vbasedev, as, errp)) {
- return false;
+
+ if (!vbasedev->mdev) {
+ hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+ vbasedev->hiod = hiod;
}
- hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
- if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+ if (!ops->attach_device(name, vbasedev, as, errp)) {
object_unref(hiod);
- ops->detach_device(vbasedev);
+ vbasedev->hiod = NULL;
return false;
}
- vbasedev->hiod = hiod;
return true;
}
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 38a9df3..9ccdb63 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -656,7 +656,6 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as,
return true;
listener_release_exit:
QLIST_REMOVE(group, container_next);
- QLIST_REMOVE(bcontainer, next);
vfio_kvm_device_del_group(group);
memory_listener_unregister(&bcontainer->listener);
if (vioc->release) {
@@ -915,6 +914,10 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
trace_vfio_attach_device(vbasedev->name, groupid);
+ if (!vfio_device_hiod_realize(vbasedev, errp)) {
+ return false;
+ }
+
group = vfio_get_group(groupid, as, errp);
if (!group) {
return false;
@@ -1142,7 +1145,6 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
VFIODevice *vdev = opaque;
hiod->name = g_strdup(vdev->name);
- hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
hiod->agent = opaque;
return true;
@@ -1151,11 +1153,9 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
Error **errp)
{
- HostIOMMUDeviceCaps *caps = &hiod->caps;
-
switch (cap) {
case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return caps->aw_bits;
+ return vfio_device_get_aw_bits(hiod->agent);
default:
error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
return -EINVAL;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index b14edd4..ea15c79 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -675,3 +675,28 @@ int vfio_device_get_aw_bits(VFIODevice *vdev)
return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX;
}
+
+bool vfio_device_is_mdev(VFIODevice *vbasedev)
+{
+ g_autofree char *subsys = NULL;
+ g_autofree char *tmp = NULL;
+
+ if (!vbasedev->sysfsdev) {
+ return false;
+ }
+
+ tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev);
+ subsys = realpath(tmp, NULL);
+ return subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
+}
+
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp)
+{
+ HostIOMMUDevice *hiod = vbasedev->hiod;
+
+ if (!hiod) {
+ return true;
+ }
+
+ return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp);
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 7b5f87a..e7bece4 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
+#include "exec/ram_addr.h"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
@@ -110,6 +111,68 @@ static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
iommufd_backend_disconnect(vbasedev->iommufd);
}
+static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
+{
+ return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+}
+
+static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ const VFIOIOMMUFDContainer *container =
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ VFIOIOASHwpt *hwpt;
+
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+
+ if (!iommufd_backend_set_dirty_tracking(container->be,
+ hwpt->hwpt_id, start, errp)) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+ iommufd_backend_set_dirty_tracking(container->be,
+ hwpt->hwpt_id, !start, NULL);
+ }
+ return -EINVAL;
+}
+
+static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ VFIOIOMMUFDContainer *container = container_of(bcontainer,
+ VFIOIOMMUFDContainer,
+ bcontainer);
+ unsigned long page_size = qemu_real_host_page_size();
+ VFIOIOASHwpt *hwpt;
+
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ if (!iommufd_hwpt_dirty_tracking(hwpt)) {
+ continue;
+ }
+
+ if (!iommufd_backend_get_dirty_bitmap(container->be, hwpt->hwpt_id,
+ iova, size, page_size,
+ (uint64_t *)vbmap->bitmap,
+ errp)) {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
{
ERRP_GUARD();
@@ -172,7 +235,7 @@ out:
return ret;
}
-static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
+static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
Error **errp)
{
int iommufd = vbasedev->iommufd->fd;
@@ -187,12 +250,12 @@ static bool iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
error_setg_errno(errp, errno,
"[iommufd=%d] error attach %s (%d) to id=%d",
iommufd, vbasedev->name, vbasedev->fd, id);
- return false;
+ return -errno;
}
trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
vbasedev->fd, id);
- return true;
+ return 0;
}
static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
@@ -212,11 +275,111 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
return true;
}
+static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
+ VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ ERRP_GUARD();
+ IOMMUFDBackend *iommufd = vbasedev->iommufd;
+ uint32_t flags = 0;
+ VFIOIOASHwpt *hwpt;
+ uint32_t hwpt_id;
+ int ret;
+
+ /* Try to find a domain */
+ QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (ret) {
+ /* -EINVAL means the domain is incompatible with the device. */
+ if (ret == -EINVAL) {
+ /*
+ * It is an expected failure and it just means we will try
+ * another domain, or create one if no existing compatible
+ * domain is found. Hence why the error is discarded below.
+ */
+ error_free(*errp);
+ *errp = NULL;
+ continue;
+ }
+
+ return false;
+ } else {
+ vbasedev->hwpt = hwpt;
+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
+ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
+ return true;
+ }
+ }
+
+ /*
+ * This is quite early and VFIO Migration state isn't yet fully
+ * initialized, thus rely only on IOMMU hardware capabilities as to
+ * whether IOMMU dirty tracking is going to be requested. Later
+ * vfio_migration_realize() may decide to use VF dirty tracking
+ * instead.
+ */
+ if (vbasedev->hiod->caps.hw_caps & IOMMU_HW_CAP_DIRTY_TRACKING) {
+ flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+ }
+
+ if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
+ container->ioas_id, flags,
+ IOMMU_HWPT_DATA_NONE, 0, NULL,
+ &hwpt_id, errp)) {
+ return false;
+ }
+
+ hwpt = g_malloc0(sizeof(*hwpt));
+ hwpt->hwpt_id = hwpt_id;
+ hwpt->hwpt_flags = flags;
+ QLIST_INIT(&hwpt->device_list);
+
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
+ g_free(hwpt);
+ return false;
+ }
+
+ vbasedev->hwpt = hwpt;
+ vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
+ QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
+ QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
+ container->bcontainer.dirty_pages_supported |=
+ vbasedev->iommu_dirty_tracking;
+ if (container->bcontainer.dirty_pages_supported &&
+ !vbasedev->iommu_dirty_tracking) {
+ warn_report("IOMMU instance for device %s doesn't support dirty tracking",
+ vbasedev->name);
+ }
+ return true;
+}
+
+static void iommufd_cdev_autodomains_put(VFIODevice *vbasedev,
+ VFIOIOMMUFDContainer *container)
+{
+ VFIOIOASHwpt *hwpt = vbasedev->hwpt;
+
+ QLIST_REMOVE(vbasedev, hwpt_next);
+ vbasedev->hwpt = NULL;
+
+ if (QLIST_EMPTY(&hwpt->device_list)) {
+ QLIST_REMOVE(hwpt, next);
+ iommufd_backend_free_id(container->be, hwpt->hwpt_id);
+ g_free(hwpt);
+ }
+}
+
static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
VFIOIOMMUFDContainer *container,
Error **errp)
{
- return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ /* mdevs aren't physical devices and will fail with auto domains */
+ if (!vbasedev->mdev) {
+ return iommufd_cdev_autodomains_get(vbasedev, container, errp);
+ }
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -227,6 +390,11 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
if (!iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
error_report_err(err);
}
+
+ if (vbasedev->hwpt) {
+ iommufd_cdev_autodomains_put(vbasedev, container);
+ }
+
}
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
@@ -320,6 +488,17 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
space = vfio_get_address_space(as);
+ /*
+ * The HostIOMMUDevice data from legacy backend is static and doesn't need
+ * any information from the (type1-iommu) backend to be initialized. In
+ * contrast however, the IOMMUFD HostIOMMUDevice data requires the iommufd
+ * FD to be connected and having a devid to be able to successfully call
+ * iommufd_backend_get_device_info().
+ */
+ if (!vfio_device_hiod_realize(vbasedev, errp)) {
+ goto err_alloc_ioas;
+ }
+
/* try to attach to an existing container in this space */
QLIST_FOREACH(bcontainer, &space->containers, next) {
container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -354,6 +533,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
+ QLIST_INIT(&container->hwpt_list);
bcontainer = &container->bcontainer;
vfio_address_space_insert(space, bcontainer);
@@ -617,6 +797,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
+ vioc->set_dirty_page_tracking = iommufd_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
@@ -628,17 +810,19 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
union {
struct iommu_hw_info_vtd vtd;
} data;
+ uint64_t hw_caps;
hiod->agent = opaque;
if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data), errp)) {
+ &type, &data, sizeof(data),
+ &hw_caps, errp)) {
return false;
}
hiod->name = g_strdup(vdev->name);
caps->type = type;
- caps->aw_bits = vfio_device_get_aw_bits(vdev);
+ caps->hw_caps = hw_caps;
return true;
}
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 34d4be2..262d42a 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -1036,16 +1036,18 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
return !vfio_block_migration(vbasedev, err, errp);
}
- if (!vbasedev->dirty_pages_supported) {
+ if ((!vbasedev->dirty_pages_supported ||
+ vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) &&
+ !vbasedev->iommu_dirty_tracking) {
if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
error_setg(&err,
- "%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ "%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
goto add_blocker;
}
- warn_report("%s: VFIO device doesn't support device dirty tracking",
- vbasedev->name);
+ warn_report("%s: VFIO device doesn't support device and "
+ "IOMMU dirty tracking", vbasedev->name);
}
ret = vfio_block_multiple_devices_migration(vbasedev, errp);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e03d9f3..2407720 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2963,12 +2963,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- char *subsys;
int i, ret;
- bool is_mdev;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- g_autofree char *tmp = NULL;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2997,14 +2994,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
* stays in sync with the active working set of the guest driver. Prevent
* the x-balloon-allowed option unless this is minimally an mdev device.
*/
- tmp = g_strdup_printf("%s/subsystem", vbasedev->sysfsdev);
- subsys = realpath(tmp, NULL);
- is_mdev = subsys && (strcmp(subsys, "/sys/bus/mdev") == 0);
- free(subsys);
+ vbasedev->mdev = vfio_device_is_mdev(vbasedev);
- trace_vfio_mdev(vbasedev->name, is_mdev);
+ trace_vfio_mdev(vbasedev->name, vbasedev->mdev);
- if (vbasedev->ram_block_discard_allowed && !is_mdev) {
+ if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) {
error_setg(errp, "x-balloon-allowed only potentially compatible "
"with mdev devices");
goto error;
@@ -3121,7 +3115,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_bars_register(vdev);
- if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) {
+ if (!vbasedev->mdev &&
+ !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) {
error_prepend(errp, "Failed to set iommu_device: ");
goto out_teardown;
}
@@ -3244,7 +3239,9 @@ out_deregister:
timer_free(vdev->intx.mmap_timer);
}
out_unset_idev:
- pci_device_unset_iommu_device(pdev);
+ if (!vbasedev->mdev) {
+ pci_device_unset_iommu_device(pdev);
+ }
out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
@@ -3289,7 +3286,9 @@ static void vfio_exitfn(PCIDevice *pdev)
vfio_pci_disable_rp_atomics(vdev);
vfio_bars_exit(vdev);
vfio_migration_exit(vbasedev);
- pci_device_unset_iommu_device(pdev);
+ if (!vbasedev->mdev) {
+ pci_device_unset_iommu_device(pdev);
+ }
}
static void vfio_pci_reset(DeviceState *dev)
@@ -3362,6 +3361,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
vbasedev.pre_copy_dirty_page_tracking,
ON_OFF_AUTO_ON),
+ DEFINE_PROP_ON_OFF_AUTO("x-device-dirty-page-tracking", VFIOPCIDevice,
+ vbasedev.device_dirty_page_tracking,
+ ON_OFF_AUTO_ON),
DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
display, ON_OFF_AUTO_OFF),
DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e8ddf92..fed499b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -95,10 +95,18 @@ typedef struct VFIOHostDMAWindow {
typedef struct IOMMUFDBackend IOMMUFDBackend;
+typedef struct VFIOIOASHwpt {
+ uint32_t hwpt_id;
+ uint32_t hwpt_flags;
+ QLIST_HEAD(, VFIODevice) device_list;
+ QLIST_ENTRY(VFIOIOASHwpt) next;
+} VFIOIOASHwpt;
+
typedef struct VFIOIOMMUFDContainer {
VFIOContainerBase bcontainer;
IOMMUFDBackend *be;
uint32_t ioas_id;
+ QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
} VFIOIOMMUFDContainer;
OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
@@ -116,6 +124,7 @@ typedef struct VFIODevice {
DeviceState *dev;
int fd;
int type;
+ bool mdev;
bool reset_works;
bool needs_reset;
bool no_mmap;
@@ -129,11 +138,15 @@ typedef struct VFIODevice {
VFIOMigration *migration;
Error *migration_blocker;
OnOffAuto pre_copy_dirty_page_tracking;
+ OnOffAuto device_dirty_page_tracking;
bool dirty_pages_supported;
bool dirty_tracking;
+ bool iommu_dirty_tracking;
HostIOMMUDevice *hiod;
int devid;
IOMMUFDBackend *iommufd;
+ VFIOIOASHwpt *hwpt;
+ QLIST_ENTRY(VFIODevice) hwpt_next;
} VFIODevice;
struct VFIODeviceOps {
@@ -231,6 +244,8 @@ void vfio_region_exit(VFIORegion *region);
void vfio_region_finalize(VFIORegion *region);
void vfio_reset_handler(void *opaque);
struct vfio_device_info *vfio_get_device_info(int fd);
+bool vfio_device_is_mdev(VFIODevice *vbasedev);
+bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp);
bool vfio_attach_device(char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
void vfio_detach_device(VFIODevice *vbasedev);
diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index c1bf74a..809cced 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -20,11 +20,12 @@
*
* @type: host platform IOMMU type.
*
- * @aw_bits: host IOMMU address width. 0xff if no limitation.
+ * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
+ * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
- uint8_t aw_bits;
+ uint64_t hw_caps;
} HostIOMMUDeviceCaps;
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 9edfec6..4c4886c 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -49,7 +49,18 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
uint32_t *type, void *data, uint32_t len,
- Error **errp);
+ uint64_t *caps, Error **errp);
+bool iommufd_backend_alloc_hwpt(IOMMUFDBackend *be, uint32_t dev_id,
+ uint32_t pt_id, uint32_t flags,
+ uint32_t data_type, uint32_t data_len,
+ void *data_ptr, uint32_t *out_hwpt,
+ Error **errp);
+bool iommufd_backend_set_dirty_tracking(IOMMUFDBackend *be, uint32_t hwpt_id,
+ bool start, Error **errp);
+bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
+ uint64_t iova, ram_addr_t size,
+ uint64_t page_size, uint64_t *data,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
#endif