diff options
author | Eric Auger <eric.auger@redhat.com> | 2023-11-21 16:44:00 +0800 |
---|---|---|
committer | Cédric Le Goater <clg@redhat.com> | 2023-12-19 19:03:38 +0100 |
commit | 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be (patch) | |
tree | ed0a5d7b3b5ba608a2d7b2dd84b03f37c204d219 /backends | |
parent | dbb9d0c9691d145338686d3e0920da047f2ab3da (diff) | |
download | qemu-6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be.zip qemu-6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be.tar.gz qemu-6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be.tar.bz2 |
backends/iommufd: Introduce the iommufd object
Introduce an iommufd object which allows the interaction
with the host /dev/iommu device.
The /dev/iommu can have been already pre-opened outside of qemu,
in which case the fd can be passed directly along with the
iommufd object:
This allows the iommufd object to be shared accross several
subsystems (VFIO, VDPA, ...). For example, libvirt would open
the /dev/iommu once.
If no fd is passed along with the iommufd object, the /dev/iommu
is opened by the qemu code.
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Diffstat (limited to 'backends')
-rw-r--r-- | backends/Kconfig | 4 | ||||
-rw-r--r-- | backends/iommufd.c | 245 | ||||
-rw-r--r-- | backends/meson.build | 1 | ||||
-rw-r--r-- | backends/trace-events | 10 |
4 files changed, 260 insertions, 0 deletions
diff --git a/backends/Kconfig b/backends/Kconfig index f35abc1..2cb23f6 100644 --- a/backends/Kconfig +++ b/backends/Kconfig @@ -1 +1,5 @@ source tpm/Kconfig + +config IOMMUFD + bool + depends on VFIO diff --git a/backends/iommufd.c b/backends/iommufd.c new file mode 100644 index 0000000..ba58a0e --- /dev/null +++ b/backends/iommufd.c @@ -0,0 +1,245 @@ +/* + * iommufd container backend + * + * Copyright (C) 2023 Intel Corporation. + * Copyright Red Hat, Inc. 2023 + * + * Authors: Yi Liu <yi.l.liu@intel.com> + * Eric Auger <eric.auger@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "sysemu/iommufd.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/module.h" +#include "qom/object_interfaces.h" +#include "qemu/error-report.h" +#include "monitor/monitor.h" +#include "trace.h" +#include <sys/ioctl.h> +#include <linux/iommufd.h> + +static void iommufd_backend_init(Object *obj) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + + be->fd = -1; + be->users = 0; + be->owned = true; + qemu_mutex_init(&be->lock); +} + +static void iommufd_backend_finalize(Object *obj) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + + if (be->owned) { + close(be->fd); + be->fd = -1; + } +} + +static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); + int fd = -1; + + fd = monitor_fd_param(monitor_cur(), str, errp); + if (fd == -1) { + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } + qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; + qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); +} + +static bool iommufd_backend_can_be_deleted(UserCreatable *uc) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); + + return !be->users; +} + +static void iommufd_backend_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->can_be_deleted = iommufd_backend_can_be_deleted; + + object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); +} + +int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) +{ + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); + if (be->users == UINT32_MAX) { + error_setg(errp, "too many connections"); + ret = -E2BIG; + goto out; + } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "/dev/iommu opening failed"); + ret = fd; + goto out; + } + be->fd = fd; + } + be->users++; +out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); + qemu_mutex_unlock(&be->lock); + return ret; +} + +void iommufd_backend_disconnect(IOMMUFDBackend *be) +{ + qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } + be->users--; + if (!be->users && be->owned) { + close(be->fd); + be->fd = -1; + } +out: + trace_iommufd_backend_disconnect(be->fd, be->users); + qemu_mutex_unlock(&be->lock); +} + +int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, + Error **errp) +{ + int ret, fd = be->fd; + struct iommu_ioas_alloc alloc_data = { + .size = sizeof(alloc_data), + .flags = 0, + }; + + ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); + if (ret) { + error_setg_errno(errp, errno, "Failed to allocate ioas"); + return ret; + } + + *ioas_id = alloc_data.out_ioas_id; + trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); + + return ret; +} + +void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) +{ + int ret, fd = be->fd; + struct iommu_destroy des = { + .size = sizeof(des), + .id = id, + }; + + ret = ioctl(fd, IOMMU_DESTROY, &des); + trace_iommufd_backend_free_id(fd, id, ret); + if (ret) { + error_report("Failed to free id: %u %m", id); + } +} + +int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +{ + int ret, fd = be->fd; + struct iommu_ioas_map map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .ioas_id = ioas_id, + .__reserved = 0, + .user_va = (uintptr_t)vaddr, + .iova = iova, + .length = size, + }; + + if (!readonly) { + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(fd, IOMMU_IOAS_MAP, &map); + trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, + vaddr, readonly, ret); + if (ret) { + ret = -errno; + + /* TODO: Not support mapping hardware PCI BAR region for now. */ + if (errno == EFAULT) { + warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); + } else { + error_report("IOMMU_IOAS_MAP failed: %m"); + } + } + return ret; +} + +int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size) +{ + int ret, fd = be->fd; + struct iommu_ioas_unmap unmap = { + .size = sizeof(unmap), + .ioas_id = ioas_id, + .iova = iova, + .length = size, + }; + + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); + /* + * IOMMUFD takes mapping as some kind of object, unmapping + * nonexistent mapping is treated as deleting a nonexistent + * object and return ENOENT. This is different from legacy + * backend which allows it. vIOMMU may trigger a lot of + * redundant unmapping, to avoid flush the log, treat them + * as succeess for IOMMUFD just like legacy backend. + */ + if (ret && errno == ENOENT) { + trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); + ret = 0; + } else { + trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); + } + + if (ret) { + ret = -errno; + error_report("IOMMU_IOAS_UNMAP failed: %m"); + } + return ret; +} + +static const TypeInfo iommufd_backend_info = { + .name = TYPE_IOMMUFD_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(IOMMUFDBackend), + .instance_init = iommufd_backend_init, + .instance_finalize = iommufd_backend_finalize, + .class_size = sizeof(IOMMUFDBackendClass), + .class_init = iommufd_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&iommufd_backend_info); +} + +type_init(register_types); diff --git a/backends/meson.build b/backends/meson.build index 914c7c4..9a5cea4 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -20,6 +20,7 @@ if have_vhost_user system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) endif system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) +system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) if have_vhost_user_crypto system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) endif diff --git a/backends/trace-events b/backends/trace-events index 652eb76..d45c6e3 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) dbus_vmstate_post_load(int version_id) "version_id: %d" dbus_vmstate_loading(const char *id) "id: %s" dbus_vmstate_saving(const char *id) "id: %s" + +# iommufd.c +iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" +iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" +iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" +iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" +iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" +iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" +iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" |