diff options
author | Thanos Makatos <thanos.makatos@nutanix.com> | 2022-07-04 12:16:08 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-04 12:16:08 +0100 |
commit | 36beb63be45ad1412562a98d9373a4c0bd91ab3d (patch) | |
tree | 61bb44f0befc0055292b120909251c0fc2d27b0e /lib | |
parent | 1c274027bb4f9d68eee846036e8d50dcde2fd7e9 (diff) | |
download | libvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.zip libvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.tar.gz libvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.tar.bz2 |
support for shadow ioeventfd (#698)
When an ioeventfd is written to, KVM discards the value since it has no
memory to write it to, and simply kicks the eventfd. This a problem for
devices such a NVMe controllers that need the value (e.g. doorbells on
BAR0). This patch allows the vfio-user server to pass a file descriptor
that can be mmap'ed and KVM can write the ioeventfd value to this
_shadow_ memory instead of discarding it. This shadow memory is not
exposed to the guest.
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Reviewed-by: John Levon <john.levon@nutanix.com>
Change-Id: Iad849c94076ffa5988e034c8bf7ec312d01f095f
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libvfio-user.c | 24 | ||||
-rw-r--r-- | lib/private.h | 1 |
2 files changed, 22 insertions, 3 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index ac04d3b..5ce5767 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -467,13 +467,19 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) EXPORT int vfu_create_ioeventfd(vfu_ctx_t *vfu_ctx, uint32_t region_idx, int fd, size_t offset, uint32_t size, uint32_t flags, - uint64_t datamatch) + uint64_t datamatch, int shadow_fd) { vfu_reg_info_t *vfu_reg; assert(vfu_ctx != NULL); assert(fd >= 0); +#ifndef SHADOW_IOEVENTFD + if (shadow_fd != -1) { + return ERROR_INT(EINVAL); + } +#endif + if (region_idx >= VFU_PCI_DEV_NUM_REGIONS) { return ERROR_INT(EINVAL); } @@ -494,6 +500,7 @@ vfu_create_ioeventfd(vfu_ctx_t *vfu_ctx, uint32_t region_idx, int fd, elem->size = size; elem->flags = flags; elem->datamatch = datamatch; + elem->shadow_fd = shadow_fd; LIST_INSERT_HEAD(&vfu_reg->subregions, elem, entry); return 0; @@ -555,6 +562,7 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ioeventfd_t *sub_reg = NULL; size_t nr_sub_reg = 0; size_t i = 0; + size_t nr_shadow_reg = 0; assert(vfu_ctx != NULL); assert(msg != NULL); @@ -585,6 +593,9 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) LIST_FOREACH(sub_reg, &vfu_reg->subregions, entry) { nr_sub_reg++; + if (sub_reg->shadow_fd != -1) { + nr_shadow_reg++; + } } if (req->argsz < sizeof(vfio_user_region_io_fds_reply_t) || @@ -614,7 +625,8 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) msg->out.nr_fds = 0; if (req->argsz >= reply->argsz) { - msg->out.fds = calloc(sizeof(int), max_sent_sub_regions); + msg->out.fds = calloc(sizeof(int), + max_sent_sub_regions + nr_shadow_reg); if (msg->out.fds == NULL) { return -1; } @@ -627,7 +639,13 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ioefd->size = sub_reg->size; ioefd->fd_index = add_fd_index(msg->out.fds, &msg->out.nr_fds, sub_reg->fd); - ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD; + if (sub_reg->shadow_fd == -1) { + ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD; + } else { + ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD_SHADOW; + int ret = add_fd_index(msg->out.fds, &msg->out.nr_fds, sub_reg->shadow_fd); + assert(ret == 1); + } ioefd->flags = sub_reg->flags; ioefd->datamatch = sub_reg->datamatch; diff --git a/lib/private.h b/lib/private.h index 7ffd6be..b875138 100644 --- a/lib/private.h +++ b/lib/private.h @@ -186,6 +186,7 @@ typedef struct ioeventfd { int32_t fd; uint32_t flags; uint64_t datamatch; + int32_t shadow_fd; LIST_ENTRY(ioeventfd) entry; } ioeventfd_t; |