From 6756ee83f8c86d0f844b2dec2cb034b379951e2d Mon Sep 17 00:00:00 2001 From: Thanos Makatos Date: Tue, 22 Nov 2022 15:51:10 +0000 Subject: shadow ioeventfd: add demo (#722) Signed-off-by: Thanos Makatos Reviewed-by: John Levon --- README.md | 8 ++ docs/ioregionfd.md | 31 ++++++ samples/meson.build | 18 ++++ samples/shadow_ioeventfd_server.c | 175 ++++++++++++++++++++++++++++++++++ samples/shadow_ioeventfd_speed_test.c | 97 +++++++++++++++++++ 5 files changed, 329 insertions(+) create mode 100644 samples/shadow_ioeventfd_server.c create mode 100644 samples/shadow_ioeventfd_speed_test.c diff --git a/README.md b/README.md index d61412c..abdd757 100644 --- a/README.md +++ b/README.md @@ -230,6 +230,14 @@ cat /sys/class/gpio/gpiochip480/base > /sys/class/gpio/export for ((i=0;i<12;i++)); do cat /sys/class/gpio/OUT0/value; done ``` +shadow_ioeventfd_server +----------------------- + +shadow_ioeventfd_server.c and shadow_ioeventfd_speed_test.c are used to +demonstrate the benefits of shadow ioeventfd, see +[ioregionfd](./docs/ioregionfd.md) for more information. + + Other usage notes ================= diff --git a/docs/ioregionfd.md b/docs/ioregionfd.md index c09b077..a0cb7d7 100644 --- a/docs/ioregionfd.md +++ b/docs/ioregionfd.md @@ -23,3 +23,34 @@ The list of patches: * kernel: https://gist.github.com/tmakatos/532afd092a8df2175120d3dbfcd719ef * QEMU: https://gist.github.com/tmakatos/57755d2a37a6d53c9ff392e7c34470f6 * SPDK: https://gist.github.com/tmakatos/f6c10fdaff59c9d629f94bd8e44a53bc + +shadow ioeventfd sample +----------------------- + +samples/shadow_ioeventfd_server.c implements a vfio-user server that allows a +part of its BAR0 to be accessed via a shadow ioeventfd. +shadow_ioeventfd_speed_test.c is run in the guest. It compares peformance of +shadow ioeventfd vs. vfio-user messages by repeatedly writing to the part of +the BAR0 that is handled by shadow ioeventfd and to the part not handled by +shadow ioeventfd. + +To run the sample: +* Patch and build the kernel and QEMU using above patches. +* Enable support for shadow ioeventfd in libvfio-user (set `shadow-ieoventfd` + to `true` in meson_options.txt and then build libvfio-user. +* Run samples/shadow_ioeventfd_server, e.g. + ``` + build/samples/shadow_ioeventfd_server /var/run/cntrl + ``` +* Start the guest with `intel_iommu=off` in the kernel command line. +* Bind the device to VFIO: + ``` + modprobe vfio-iommu-type1 allow_unsafe_interrupts=1 + modprobe vfio-pci ids=4e58:0 + ``` + Build and run the test app in the guest (it needs to be copied there first), + the BDF needs to be substituted accordingly: + ``` + gcc shadow_ioeventfd_speed_test.c + ./a.out 3 0000:00:03.0 + ``` diff --git a/samples/meson.build b/samples/meson.build index d1d435c..9807a77 100644 --- a/samples/meson.build +++ b/samples/meson.build @@ -92,3 +92,21 @@ lspci = executable( include_directories: lib_include_dir, install: false, ) + + +shadow_ioeventfd_server_sources = [ + 'shadow_ioeventfd_server.c', +] + +shadow_ioeventfd_server_deps = [ + libvfio_user_dep, +] + +shadow_ioeventfd_server = executable( + 'shadow_ioeventfd_server', + shadow_ioeventfd_server_sources, + c_args: common_cflags, + dependencies: shadow_ioeventfd_server_deps, + include_directories: lib_include_dir, + install: false, +) diff --git a/samples/shadow_ioeventfd_server.c b/samples/shadow_ioeventfd_server.c new file mode 100644 index 0000000..a737831 --- /dev/null +++ b/samples/shadow_ioeventfd_server.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2022, Nutanix Inc. All rights reserved. + * Author: Thanos Makatos + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* + * shadow_ioeventfd_server.c: an example of how to use a shadow ioeventfd. + * There is no Linux kernel driver, use samples/shadow_ioeventfd_speed_test.c + * in the guest instead. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libvfio-user.h" +#include "common.h" + +static void +_log(vfu_ctx_t *vfu_ctx UNUSED, int level UNUSED, char const *msg) +{ + fprintf(stderr, "%s\n", msg); +} + +static ssize_t +bar0_cb(vfu_ctx_t *vfu_ctx UNUSED, char * const buf UNUSED, + size_t count UNUSED, loff_t offset UNUSED, + const bool is_write UNUSED) +{ + return count; +} + +int +main(int argc, char *argv[]) +{ + int ret; + vfu_ctx_t *vfu_ctx; + struct pollfd fds[2]; /* one for vfu_ctx, one for shadow_ioeventfd */ + int fd, bar0_fd; + + if (argc != 2) { + errx(EXIT_FAILURE, "missing vfio-user socket path"); + } + + vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, argv[1], + LIBVFIO_USER_FLAG_ATTACH_NB, NULL, + VFU_DEV_TYPE_PCI); + + if (vfu_ctx == NULL) { + err(EXIT_FAILURE, "failed to initialize device emulation"); + } + + ret = vfu_setup_log(vfu_ctx, _log, LOG_ERR); + if (ret < 0) { + err(EXIT_FAILURE, "failed to setup log"); + } + + ret = vfu_pci_init(vfu_ctx, VFU_PCI_TYPE_CONVENTIONAL, + PCI_HEADER_TYPE_NORMAL, 0); + if (ret < 0) { + err(EXIT_FAILURE, "vfu_pci_init() failed"); + } + + vfu_pci_set_id(vfu_ctx, 0x4e58, 0, 0x0, 0x0); + + ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + sysconf(_SC_PAGE_SIZE), &bar0_cb, + VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM, NULL, 0, + -1, 0); + if (ret < 0) { + err(EXIT_FAILURE, "failed to setup region"); + } + + ret = vfu_realize_ctx(vfu_ctx); + if (ret < 0) { + err(EXIT_FAILURE, "failed to realize device"); + } + + fds[0] = (struct pollfd) { + .fd = vfu_get_poll_fd(vfu_ctx), + .events = POLLIN | POLLOUT + }; + ret = poll(fds, 1, -1); + assert(ret == 1); + ret = vfu_attach_ctx(vfu_ctx); + if (ret < 0) { + err(EXIT_FAILURE, "failed to attach device"); + } + + fd = eventfd(0, 0); + if (fd == -1) { + err(EXIT_FAILURE, "failed to create eventfd"); + } + bar0_fd = syscall(SYS_memfd_create, "BAR0", 0); + if (bar0_fd == -1) { + err(EXIT_FAILURE, "failed to create BAR0 file"); + } + ret = ftruncate(bar0_fd, sysconf(_SC_PAGESIZE)); + if (ret == -1) { + err(EXIT_FAILURE, "failed to truncate BAR0 file"); + } + ret = vfu_create_ioeventfd(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + fd, 0, 4, + 0, false, bar0_fd, 0); + if (ret == -1) { + err(EXIT_FAILURE, "failed to create shadow ioeventfd"); + } + + fds[0] = (struct pollfd) { + .fd = vfu_get_poll_fd(vfu_ctx), + .events = POLLIN + }; + fds[1] = (struct pollfd) { + .fd = fd, + .events = POLLIN + }; + + do { + ret = poll(fds, 2, -1); + if (ret < 0) { + err(EXIT_FAILURE, "failed to poll(2)"); + } + assert(ret > 0); + if (fds[0].revents & (POLLIN)) { + ret = vfu_run_ctx(vfu_ctx); + if (ret < 0) { + if (errno == EAGAIN) { + continue; + } + if (errno == ENOTCONN) { + return 0; + } + err(EXIT_FAILURE, "vfu_run_ctx() failed"); + } + } + if (fds[1].revents & POLLIN) { + eventfd_t value; + eventfd_read(fd, &value); + bar0_cb(vfu_ctx, NULL, 4, 0, true); + } + } while (true); + return 0; +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/samples/shadow_ioeventfd_speed_test.c b/samples/shadow_ioeventfd_speed_test.c new file mode 100644 index 0000000..5fe036d --- /dev/null +++ b/samples/shadow_ioeventfd_speed_test.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2022, Nutanix Inc. All rights reserved. + * Author: Thanos Makatos + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +/* + * shadow_ioeventfd_speed_test.c: application that is run in the guest to + * demonstrate the performance benefit of shadow ioeventfd. To be used with + * shadow_ioeventfd_server.c on the host. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + int container = open("/dev/vfio/vfio", O_RDWR); + assert(container != -1); + char path[PATH_MAX]; + sprintf(path, "/dev/vfio/%d", atoi(argv[1])); + int group = open(path, O_RDWR); + assert(group != 0); + struct vfio_group_status status = { .argsz = sizeof(status) }; + int ret = ioctl(group, VFIO_GROUP_GET_STATUS, &status); + assert(ret != -1); + assert(status.flags & VFIO_GROUP_FLAGS_VIABLE); + ret = ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); + assert(ret != -1); + ret = ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); + assert(ret == 0); + int device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, argv[2]); + assert(device >= 0); + struct vfio_region_info region_info = { + .argsz = sizeof(region_info), + .index = 0 + }; + ret = ioctl(device, VFIO_DEVICE_GET_REGION_INFO, ®ion_info); + assert(ret == 0); + u_int32_t val = 0xdeadbeef; + struct timeval t0, t1; + const int count = 100000; + gettimeofday(&t0, NULL); + for (int i = 0; i < count; i++) { + ret = pwrite(device, &val, sizeof val, region_info.offset); + assert(ret == sizeof val); + } + gettimeofday(&t1, NULL); + printf("shadow:\t%lu us\n", + (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec); + gettimeofday(&t0, NULL); + for (int i = 0; i < count; i++) { + ret = pwrite(device, &val, sizeof val, region_info.offset + 8); + assert(ret == sizeof val); + } + gettimeofday(&t1, NULL); + printf("legacy:\t%lu us\n", + (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec); + return 0; +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.1