aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2022-07-04 12:16:08 +0100
committerGitHub <noreply@github.com>2022-07-04 12:16:08 +0100
commit36beb63be45ad1412562a98d9373a4c0bd91ab3d (patch)
tree61bb44f0befc0055292b120909251c0fc2d27b0e /lib
parent1c274027bb4f9d68eee846036e8d50dcde2fd7e9 (diff)
downloadlibvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.zip
libvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.tar.gz
libvfio-user-36beb63be45ad1412562a98d9373a4c0bd91ab3d.tar.bz2
support for shadow ioeventfd (#698)
When an ioeventfd is written to, KVM discards the value since it has no memory to write it to, and simply kicks the eventfd. This a problem for devices such a NVMe controllers that need the value (e.g. doorbells on BAR0). This patch allows the vfio-user server to pass a file descriptor that can be mmap'ed and KVM can write the ioeventfd value to this _shadow_ memory instead of discarding it. This shadow memory is not exposed to the guest. Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com> Reviewed-by: John Levon <john.levon@nutanix.com> Change-Id: Iad849c94076ffa5988e034c8bf7ec312d01f095f
Diffstat (limited to 'lib')
-rw-r--r--lib/libvfio-user.c24
-rw-r--r--lib/private.h1
2 files changed, 22 insertions, 3 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index ac04d3b..5ce5767 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -467,13 +467,19 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
EXPORT int
vfu_create_ioeventfd(vfu_ctx_t *vfu_ctx, uint32_t region_idx, int fd,
size_t offset, uint32_t size, uint32_t flags,
- uint64_t datamatch)
+ uint64_t datamatch, int shadow_fd)
{
vfu_reg_info_t *vfu_reg;
assert(vfu_ctx != NULL);
assert(fd >= 0);
+#ifndef SHADOW_IOEVENTFD
+ if (shadow_fd != -1) {
+ return ERROR_INT(EINVAL);
+ }
+#endif
+
if (region_idx >= VFU_PCI_DEV_NUM_REGIONS) {
return ERROR_INT(EINVAL);
}
@@ -494,6 +500,7 @@ vfu_create_ioeventfd(vfu_ctx_t *vfu_ctx, uint32_t region_idx, int fd,
elem->size = size;
elem->flags = flags;
elem->datamatch = datamatch;
+ elem->shadow_fd = shadow_fd;
LIST_INSERT_HEAD(&vfu_reg->subregions, elem, entry);
return 0;
@@ -555,6 +562,7 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ioeventfd_t *sub_reg = NULL;
size_t nr_sub_reg = 0;
size_t i = 0;
+ size_t nr_shadow_reg = 0;
assert(vfu_ctx != NULL);
assert(msg != NULL);
@@ -585,6 +593,9 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
LIST_FOREACH(sub_reg, &vfu_reg->subregions, entry) {
nr_sub_reg++;
+ if (sub_reg->shadow_fd != -1) {
+ nr_shadow_reg++;
+ }
}
if (req->argsz < sizeof(vfio_user_region_io_fds_reply_t) ||
@@ -614,7 +625,8 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
msg->out.nr_fds = 0;
if (req->argsz >= reply->argsz) {
- msg->out.fds = calloc(sizeof(int), max_sent_sub_regions);
+ msg->out.fds = calloc(sizeof(int),
+ max_sent_sub_regions + nr_shadow_reg);
if (msg->out.fds == NULL) {
return -1;
}
@@ -627,7 +639,13 @@ handle_device_get_region_io_fds(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ioefd->size = sub_reg->size;
ioefd->fd_index = add_fd_index(msg->out.fds, &msg->out.nr_fds,
sub_reg->fd);
- ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD;
+ if (sub_reg->shadow_fd == -1) {
+ ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD;
+ } else {
+ ioefd->type = VFIO_USER_IO_FD_TYPE_IOEVENTFD_SHADOW;
+ int ret = add_fd_index(msg->out.fds, &msg->out.nr_fds, sub_reg->shadow_fd);
+ assert(ret == 1);
+ }
ioefd->flags = sub_reg->flags;
ioefd->datamatch = sub_reg->datamatch;
diff --git a/lib/private.h b/lib/private.h
index 7ffd6be..b875138 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -186,6 +186,7 @@ typedef struct ioeventfd {
int32_t fd;
uint32_t flags;
uint64_t datamatch;
+ int32_t shadow_fd;
LIST_ENTRY(ioeventfd) entry;
} ioeventfd_t;