aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2022-11-22 15:51:10 +0000
committerGitHub <noreply@github.com>2022-11-22 15:51:10 +0000
commit6756ee83f8c86d0f844b2dec2cb034b379951e2d (patch)
treea7176ceb24815d140499189b23fbfcdccdc18227
parent360f6a6795a94d81b036c13a08595db944c1904c (diff)
downloadlibvfio-user-6756ee83f8c86d0f844b2dec2cb034b379951e2d.zip
libvfio-user-6756ee83f8c86d0f844b2dec2cb034b379951e2d.tar.gz
libvfio-user-6756ee83f8c86d0f844b2dec2cb034b379951e2d.tar.bz2
shadow ioeventfd: add demo (#722)
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com> Reviewed-by: John Levon <john.levon@nutanix.com>
-rw-r--r--README.md8
-rw-r--r--docs/ioregionfd.md31
-rw-r--r--samples/meson.build18
-rw-r--r--samples/shadow_ioeventfd_server.c175
-rw-r--r--samples/shadow_ioeventfd_speed_test.c97
5 files changed, 329 insertions, 0 deletions
diff --git a/README.md b/README.md
index d61412c..abdd757 100644
--- a/README.md
+++ b/README.md
@@ -230,6 +230,14 @@ cat /sys/class/gpio/gpiochip480/base > /sys/class/gpio/export
for ((i=0;i<12;i++)); do cat /sys/class/gpio/OUT0/value; done
```
+shadow_ioeventfd_server
+-----------------------
+
+shadow_ioeventfd_server.c and shadow_ioeventfd_speed_test.c are used to
+demonstrate the benefits of shadow ioeventfd, see
+[ioregionfd](./docs/ioregionfd.md) for more information.
+
+
Other usage notes
=================
diff --git a/docs/ioregionfd.md b/docs/ioregionfd.md
index c09b077..a0cb7d7 100644
--- a/docs/ioregionfd.md
+++ b/docs/ioregionfd.md
@@ -23,3 +23,34 @@ The list of patches:
* kernel: https://gist.github.com/tmakatos/532afd092a8df2175120d3dbfcd719ef
* QEMU: https://gist.github.com/tmakatos/57755d2a37a6d53c9ff392e7c34470f6
* SPDK: https://gist.github.com/tmakatos/f6c10fdaff59c9d629f94bd8e44a53bc
+
+shadow ioeventfd sample
+-----------------------
+
+samples/shadow_ioeventfd_server.c implements a vfio-user server that allows a
+part of its BAR0 to be accessed via a shadow ioeventfd.
+shadow_ioeventfd_speed_test.c is run in the guest. It compares peformance of
+shadow ioeventfd vs. vfio-user messages by repeatedly writing to the part of
+the BAR0 that is handled by shadow ioeventfd and to the part not handled by
+shadow ioeventfd.
+
+To run the sample:
+* Patch and build the kernel and QEMU using above patches.
+* Enable support for shadow ioeventfd in libvfio-user (set `shadow-ieoventfd`
+ to `true` in meson_options.txt and then build libvfio-user.
+* Run samples/shadow_ioeventfd_server, e.g.
+ ```
+ build/samples/shadow_ioeventfd_server /var/run/cntrl
+ ```
+* Start the guest with `intel_iommu=off` in the kernel command line.
+* Bind the device to VFIO:
+ ```
+ modprobe vfio-iommu-type1 allow_unsafe_interrupts=1
+ modprobe vfio-pci ids=4e58:0
+ ```
+ Build and run the test app in the guest (it needs to be copied there first),
+ the BDF needs to be substituted accordingly:
+ ```
+ gcc shadow_ioeventfd_speed_test.c
+ ./a.out 3 0000:00:03.0
+ ```
diff --git a/samples/meson.build b/samples/meson.build
index d1d435c..9807a77 100644
--- a/samples/meson.build
+++ b/samples/meson.build
@@ -92,3 +92,21 @@ lspci = executable(
include_directories: lib_include_dir,
install: false,
)
+
+
+shadow_ioeventfd_server_sources = [
+ 'shadow_ioeventfd_server.c',
+]
+
+shadow_ioeventfd_server_deps = [
+ libvfio_user_dep,
+]
+
+shadow_ioeventfd_server = executable(
+ 'shadow_ioeventfd_server',
+ shadow_ioeventfd_server_sources,
+ c_args: common_cflags,
+ dependencies: shadow_ioeventfd_server_deps,
+ include_directories: lib_include_dir,
+ install: false,
+)
diff --git a/samples/shadow_ioeventfd_server.c b/samples/shadow_ioeventfd_server.c
new file mode 100644
index 0000000..a737831
--- /dev/null
+++ b/samples/shadow_ioeventfd_server.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022, Nutanix Inc. All rights reserved.
+ * Author: Thanos Makatos <thanos@nutanix.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Nutanix nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+/*
+ * shadow_ioeventfd_server.c: an example of how to use a shadow ioeventfd.
+ * There is no Linux kernel driver, use samples/shadow_ioeventfd_speed_test.c
+ * in the guest instead.
+ */
+
+#include <stdio.h>
+#include <err.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/poll.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include "libvfio-user.h"
+#include "common.h"
+
+static void
+_log(vfu_ctx_t *vfu_ctx UNUSED, int level UNUSED, char const *msg)
+{
+ fprintf(stderr, "%s\n", msg);
+}
+
+static ssize_t
+bar0_cb(vfu_ctx_t *vfu_ctx UNUSED, char * const buf UNUSED,
+ size_t count UNUSED, loff_t offset UNUSED,
+ const bool is_write UNUSED)
+{
+ return count;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ vfu_ctx_t *vfu_ctx;
+ struct pollfd fds[2]; /* one for vfu_ctx, one for shadow_ioeventfd */
+ int fd, bar0_fd;
+
+ if (argc != 2) {
+ errx(EXIT_FAILURE, "missing vfio-user socket path");
+ }
+
+ vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, argv[1],
+ LIBVFIO_USER_FLAG_ATTACH_NB, NULL,
+ VFU_DEV_TYPE_PCI);
+
+ if (vfu_ctx == NULL) {
+ err(EXIT_FAILURE, "failed to initialize device emulation");
+ }
+
+ ret = vfu_setup_log(vfu_ctx, _log, LOG_ERR);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to setup log");
+ }
+
+ ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_CONVENTIONAL,
+ PCI_HEADER_TYPE_NORMAL, 0);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "vfu_pci_init() failed");
+ }
+
+ vfu_pci_set_id(vfu_ctx, 0x4e58, 0, 0x0, 0x0);
+
+ ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ sysconf(_SC_PAGE_SIZE), &bar0_cb,
+ VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, NULL, 0,
+ -1, 0);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to setup region");
+ }
+
+ ret = vfu_realize_ctx(vfu_ctx);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to realize device");
+ }
+
+ fds[0] = (struct pollfd) {
+ .fd = vfu_get_poll_fd(vfu_ctx),
+ .events = POLLIN | POLLOUT
+ };
+ ret = poll(fds, 1, -1);
+ assert(ret == 1);
+ ret = vfu_attach_ctx(vfu_ctx);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to attach device");
+ }
+
+ fd = eventfd(0, 0);
+ if (fd == -1) {
+ err(EXIT_FAILURE, "failed to create eventfd");
+ }
+ bar0_fd = syscall(SYS_memfd_create, "BAR0", 0);
+ if (bar0_fd == -1) {
+ err(EXIT_FAILURE, "failed to create BAR0 file");
+ }
+ ret = ftruncate(bar0_fd, sysconf(_SC_PAGESIZE));
+ if (ret == -1) {
+ err(EXIT_FAILURE, "failed to truncate BAR0 file");
+ }
+ ret = vfu_create_ioeventfd(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ fd, 0, 4,
+ 0, false, bar0_fd, 0);
+ if (ret == -1) {
+ err(EXIT_FAILURE, "failed to create shadow ioeventfd");
+ }
+
+ fds[0] = (struct pollfd) {
+ .fd = vfu_get_poll_fd(vfu_ctx),
+ .events = POLLIN
+ };
+ fds[1] = (struct pollfd) {
+ .fd = fd,
+ .events = POLLIN
+ };
+
+ do {
+ ret = poll(fds, 2, -1);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to poll(2)");
+ }
+ assert(ret > 0);
+ if (fds[0].revents & (POLLIN)) {
+ ret = vfu_run_ctx(vfu_ctx);
+ if (ret < 0) {
+ if (errno == EAGAIN) {
+ continue;
+ }
+ if (errno == ENOTCONN) {
+ return 0;
+ }
+ err(EXIT_FAILURE, "vfu_run_ctx() failed");
+ }
+ }
+ if (fds[1].revents & POLLIN) {
+ eventfd_t value;
+ eventfd_read(fd, &value);
+ bar0_cb(vfu_ctx, NULL, 4, 0, true);
+ }
+ } while (true);
+ return 0;
+}
+
+/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/samples/shadow_ioeventfd_speed_test.c b/samples/shadow_ioeventfd_speed_test.c
new file mode 100644
index 0000000..5fe036d
--- /dev/null
+++ b/samples/shadow_ioeventfd_speed_test.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022, Nutanix Inc. All rights reserved.
+ * Author: Thanos Makatos <thanos@nutanix.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Nutanix nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+/*
+ * shadow_ioeventfd_speed_test.c: application that is run in the guest to
+ * demonstrate the performance benefit of shadow ioeventfd. To be used with
+ * shadow_ioeventfd_server.c on the host.
+ */
+
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/vfio.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/limits.h>
+#include <stdlib.h>
+#include <err.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+int main(int argc, char *argv[])
+{
+ int container = open("/dev/vfio/vfio", O_RDWR);
+ assert(container != -1);
+ char path[PATH_MAX];
+ sprintf(path, "/dev/vfio/%d", atoi(argv[1]));
+ int group = open(path, O_RDWR);
+ assert(group != 0);
+ struct vfio_group_status status = { .argsz = sizeof(status) };
+ int ret = ioctl(group, VFIO_GROUP_GET_STATUS, &status);
+ assert(ret != -1);
+ assert(status.flags & VFIO_GROUP_FLAGS_VIABLE);
+ ret = ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
+ assert(ret != -1);
+ ret = ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+ assert(ret == 0);
+ int device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, argv[2]);
+ assert(device >= 0);
+ struct vfio_region_info region_info = {
+ .argsz = sizeof(region_info),
+ .index = 0
+ };
+ ret = ioctl(device, VFIO_DEVICE_GET_REGION_INFO, &region_info);
+ assert(ret == 0);
+ u_int32_t val = 0xdeadbeef;
+ struct timeval t0, t1;
+ const int count = 100000;
+ gettimeofday(&t0, NULL);
+ for (int i = 0; i < count; i++) {
+ ret = pwrite(device, &val, sizeof val, region_info.offset);
+ assert(ret == sizeof val);
+ }
+ gettimeofday(&t1, NULL);
+ printf("shadow:\t%lu us\n",
+ (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
+ gettimeofday(&t0, NULL);
+ for (int i = 0; i < count; i++) {
+ ret = pwrite(device, &val, sizeof val, region_info.offset + 8);
+ assert(ret == sizeof val);
+ }
+ gettimeofday(&t1, NULL);
+ printf("legacy:\t%lu us\n",
+ (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec);
+ return 0;
+}
+
+/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */