aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2020-03-24 12:06:26 -0400
committerThanos Makatos <thanos.makatos@nutanix.com>2020-03-25 10:36:29 -0400
commit8435c007567fdd0a92ed1f4e8dd7b60bb09ae116 (patch)
tree67fcfe389226d546bbe67bac7bf32f3cadfdd922
parent9a8dddb7ed5c5d4ac0bc1ff89af795e5fe312c86 (diff)
downloadlibvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.zip
libvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.tar.gz
libvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.tar.bz2
introduce vfio-over-socket transport
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt5
-rw-r--r--kmod/muser.c33
-rw-r--r--kmod/muser.h94
-rw-r--r--lib/libmuser.c464
-rw-r--r--lib/muser.h22
m---------libpathtrap0
-rw-r--r--libvfio/CMakeLists.txt35
-rw-r--r--libvfio/libvfio.c513
-rw-r--r--samples/gpio-pci-idio-16.c32
10 files changed, 1108 insertions, 93 deletions
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..6848f53
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "libpathtrap"]
+ path = libpathtrap
+ url = git@github.com:tmakatos/libpathtrap.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7975983..8ec6e5c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,11 +32,14 @@ cmake_minimum_required (VERSION 2.6)
project(muser)
include(GNUInstallDirs)
-# shared library
+# shared libraries
add_subdirectory(lib)
+add_subdirectory(libpathtrap)
+add_subdirectory(libvfio)
# kernel module
add_subdirectory(kmod)
# samples
add_subdirectory(samples)
+
diff --git a/kmod/muser.c b/kmod/muser.c
index 9e5a2c8..53cc3d8 100644
--- a/kmod/muser.c
+++ b/kmod/muser.c
@@ -1046,37 +1046,6 @@ err:
return ret;
}
-static ssize_t get_minsz(unsigned int cmd)
-{
- switch (cmd) {
- case VFIO_DEVICE_GET_INFO:
- return offsetofend(struct vfio_device_info, num_irqs);
- case VFIO_DEVICE_GET_REGION_INFO:
- return offsetofend(struct vfio_region_info, offset);
- case VFIO_DEVICE_GET_IRQ_INFO:
- return offsetofend(struct vfio_irq_info, count);
- case VFIO_DEVICE_SET_IRQS:
- return offsetofend(struct vfio_irq_set, count);
- }
- return -EOPNOTSUPP;
-}
-
-static ssize_t get_argsz(unsigned int cmd, struct mudev_cmd *mucmd)
-{
- switch (cmd) {
- case VFIO_DEVICE_GET_INFO:
- return mucmd->muser_cmd.ioctl.data.dev_info.argsz;
- case VFIO_DEVICE_GET_REGION_INFO:
- return mucmd->muser_cmd.ioctl.data.reg_info.argsz;
- case VFIO_DEVICE_GET_IRQ_INFO:
- return mucmd->muser_cmd.ioctl.data.irq_info.argsz;
- case VFIO_DEVICE_SET_IRQS:
- return mucmd->muser_cmd.ioctl.data.irq_set.argsz;
- }
-
- return -EOPNOTSUPP;
-}
-
static int muser_ioctl_setup_cmd(struct mudev_cmd *mucmd, unsigned int cmd,
unsigned long arg)
{
@@ -1095,7 +1064,7 @@ static int muser_ioctl_setup_cmd(struct mudev_cmd *mucmd, unsigned int cmd,
return err;
/* Fetch argsz provided by caller. */
- argsz = get_argsz(cmd, mucmd);
+ argsz = get_argsz(cmd, &mucmd->muser_cmd);
if (argsz < 0)
return argsz;
diff --git a/kmod/muser.h b/kmod/muser.h
index 65841a4..9791736 100644
--- a/kmod/muser.h
+++ b/kmod/muser.h
@@ -13,6 +13,14 @@
#ifndef __KERNEL__
#include <sys/types.h>
+#include <stddef.h>
+#include <errno.h>
+
+/* FIXME copied from include/linux/stddef.h, is this OK license-wise? */
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
#endif
#include <linux/ioctl.h>
@@ -41,6 +49,15 @@ struct muser_cmd_ioctl {
struct vfio_region_info reg_info;
struct vfio_irq_info irq_info;
struct vfio_irq_set irq_set;
+ struct vfio_group_status group_status;
+ int vfio_api_version;
+ int vfio_extension;
+ int container_fd;
+ int device_fd;
+ int iommu_type;
+ struct vfio_iommu_type1_info iommu_type1_info;
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
} data;
};
@@ -70,4 +87,81 @@ struct muser_cmd {
#define MUSER_DEV_CMD_WAIT _IOR('M', 1, struct muser_cmd)
#define MUSER_DEV_CMD_DONE _IOW('M', 2, struct muser_cmd)
+static inline ssize_t get_minsz(unsigned int cmd)
+{
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ return offsetofend(struct vfio_device_info, num_irqs);
+ case VFIO_DEVICE_GET_REGION_INFO:
+ return offsetofend(struct vfio_region_info, offset);
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ return offsetofend(struct vfio_irq_info, count);
+ case VFIO_DEVICE_SET_IRQS:
+ return offsetofend(struct vfio_irq_set, count);
+ case VFIO_GROUP_GET_STATUS:
+ return offsetofend(struct vfio_group_status, flags);
+ case VFIO_GET_API_VERSION:
+ return 0;
+ case VFIO_CHECK_EXTENSION:
+ case VFIO_GROUP_SET_CONTAINER:
+ case VFIO_GROUP_UNSET_CONTAINER:
+ case VFIO_SET_IOMMU:
+ return sizeof(int);
+ case VFIO_IOMMU_GET_INFO:
+ return offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+ case VFIO_IOMMU_MAP_DMA:
+ return offsetofend(struct vfio_iommu_type1_dma_map, size);
+ case VFIO_IOMMU_UNMAP_DMA:
+ return offsetofend(struct vfio_iommu_type1_dma_unmap, size);
+ case VFIO_GROUP_GET_DEVICE_FD:
+ case VFIO_DEVICE_RESET:
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+static inline ssize_t get_argsz(unsigned int cmd, struct muser_cmd *muser_cmd)
+{
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ return muser_cmd->ioctl.data.dev_info.argsz;
+ case VFIO_DEVICE_GET_REGION_INFO:
+ return muser_cmd->ioctl.data.reg_info.argsz;
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ return muser_cmd->ioctl.data.irq_info.argsz;
+ case VFIO_DEVICE_SET_IRQS:
+ return muser_cmd->ioctl.data.irq_set.argsz;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static inline const char* vfio_cmd_to_str(int cmd) {
+ switch (cmd) {
+ case VFIO_GET_API_VERSION: return "VFIO_GET_API_VERSION";
+ case VFIO_CHECK_EXTENSION: return "VFIO_CHECK_EXTENSION";
+ case VFIO_SET_IOMMU: return "VFIO_SET_IOMMU";
+ case VFIO_GROUP_GET_STATUS: return "VFIO_GROUP_GET_STATUS";
+ case VFIO_GROUP_SET_CONTAINER: return "VFIO_GROUP_SET_CONTAINER";
+ case VFIO_GROUP_UNSET_CONTAINER: return "VFIO_GROUP_UNSET_CONTAINER";
+ case VFIO_GROUP_GET_DEVICE_FD: return "VFIO_GROUP_GET_DEVICE_FD";
+ case VFIO_DEVICE_GET_INFO: return "VFIO_DEVICE_GET_INFO";
+ case VFIO_DEVICE_GET_REGION_INFO: return "VFIO_DEVICE_GET_REGION_INFO";
+ case VFIO_DEVICE_GET_IRQ_INFO: return "VFIO_DEVICE_GET_IRQ_INFO";
+ case VFIO_DEVICE_SET_IRQS: return "VFIO_DEVICE_SET_IRQS";
+ case VFIO_DEVICE_RESET: return "VFIO_DEVICE_RESET";
+ case VFIO_IOMMU_GET_INFO: return "VFIO_IOMMU_GET_INFO/VFIO_DEVICE_GET_PCI_HOT_RESET_INFO/VFIO_IOMMU_SPAPR_TCE_GET_INFO";
+ case VFIO_IOMMU_MAP_DMA: return "VFIO_IOMMU_MAP_DMA/VFIO_DEVICE_PCI_HOT_RESET";
+ case VFIO_IOMMU_UNMAP_DMA: return "VFIO_IOMMU_UNMAP_DMA";
+ case VFIO_IOMMU_ENABLE: return "VFIO_IOMMU_ENABLE";
+ case VFIO_IOMMU_DISABLE: return "VFIO_IOMMU_DISABLE";
+ case VFIO_EEH_PE_OP: return "VFIO_EEH_PE_OP";
+ case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_REGISTER_MEMORY";
+ case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY";
+ case VFIO_IOMMU_SPAPR_TCE_CREATE: return "VFIO_IOMMU_SPAPR_TCE_CREATE";
+ case VFIO_IOMMU_SPAPR_TCE_REMOVE: return "VFIO_IOMMU_SPAPR_TCE_REMOVE";
+ }
+ return NULL;
+}
+
#endif /* _UAPI_LINUX_MUSER_H */
diff --git a/lib/libmuser.c b/lib/libmuser.c
index e72efba..ff46ed9 100644
--- a/lib/libmuser.c
+++ b/lib/libmuser.c
@@ -47,6 +47,11 @@
#include <stdarg.h>
#include <linux/vfio.h>
#include <sys/param.h>
+#include <sys/un.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
#include "../kmod/muser.h"
#include "muser.h"
@@ -54,6 +59,8 @@
#include "dma.h"
#include "cap.h"
+#define IOMMU_GRP_NAME "iommu_group"
+
typedef enum {
IRQ_NONE = 0,
IRQ_INTX,
@@ -77,6 +84,160 @@ typedef struct {
_Static_assert(sizeof(s) - offsetof(s, m) == sizeof(t), \
#t " " #m " must be last member in " #s)
+struct lm_ctx {
+ void *pvt;
+ dma_controller_t *dma;
+ int fd;
+ int (*reset) (void *pvt);
+ lm_log_lvl_t log_lvl;
+ lm_log_fn_t *log;
+ lm_pci_info_t pci_info;
+ lm_pci_config_space_t *pci_config_space;
+ lm_trans_t trans;
+ struct caps *caps;
+
+ /* LM_TRANS_SOCK */
+ char *iommu_dir;
+ int iommu_dir_fd;
+
+ lm_irqs_t irqs; /* XXX must be last */
+} __attribute__((packed)); /* FIXME packed required to make below macro work */
+MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t);
+
+
+/* function prototypes */
+static int
+muser_dma_map(lm_ctx_t*, struct muser_cmd*);
+
+static int
+muser_dma_unmap(lm_ctx_t*, struct muser_cmd*);
+
+static void
+free_sparse_mmap_areas(lm_reg_info_t*);
+
+static int
+dev_detach(int dev_fd)
+{
+ return close(dev_fd);
+}
+
+static int
+dev_attach(lm_ctx_t *lm_ctx __attribute__((unused)), const char *uuid)
+{
+ char *path;
+ int dev_fd;
+ int err;
+
+ err = asprintf(&path, "/dev/" MUSER_DEVNODE "/%s", uuid);
+ if (err != (int)(strlen(MUSER_DEVNODE) + strlen(uuid) + 6)) {
+ return -1;
+ }
+
+ dev_fd = open(path, O_RDWR);
+
+ free(path);
+
+ return dev_fd;
+}
+
+static int
+get_request_kernel(int fd, struct muser_cmd *cmd)
+{
+ return ioctl(fd, MUSER_DEV_CMD_WAIT, &cmd);
+}
+
+static int
+send_response_kernel(int fd, struct muser_cmd *cmd)
+{
+ return ioctl(fd, MUSER_DEV_CMD_DONE, &cmd);
+}
+
+static int
+open_sock(lm_ctx_t *lm_ctx, const char *uuid)
+{
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ int ret, fd;
+ unsigned long iommu_grp;
+ char *endptr;
+
+ assert(lm_ctx != NULL);
+ assert(uuid != NULL);
+
+ /*
+ * FIXME simplify by creating everything under a temporary directory and
+ * then atomically rename
+ */
+
+ iommu_grp = strtoul(uuid, &endptr, 10);
+ if (*endptr != '\0' || (iommu_grp == ULONG_MAX && errno == ERANGE)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+ return fd;
+ }
+
+ /* create /dev/vfio */
+ if (mkdir(VFIO_DIR, 0755) == -1 && errno != EEXIST) {
+ return -1;
+ }
+
+ /* create /dev/vfio/<IOMMU group> */
+ if ((ret = asprintf(&lm_ctx->iommu_dir, VFIO_DIR "%lu", iommu_grp)) == -1) {
+ return -1;
+ }
+ if (mkdir(lm_ctx->iommu_dir, 0755) == -1) {
+ return -1;
+ }
+
+ if ((lm_ctx->iommu_dir_fd = open(lm_ctx->iommu_dir, O_DIRECTORY)) == -1) {
+ return -1;
+ }
+
+ /* crealte symlink /dev/vfio/<IOMMU group>/iommu_group -> ../<IOMMU group> */
+ if ((ret = symlinkat(lm_ctx->iommu_dir, lm_ctx->iommu_dir_fd, IOMMU_GRP_NAME)) == -1) {
+ return -1;
+ }
+
+ /* create control socket */
+ if ((ret = openat(lm_ctx->iommu_dir_fd, MUSER_SOCK, O_WRONLY | O_CREAT)) == -1) {
+ return -1;
+ }
+ ret = snprintf(addr.sun_path, sizeof addr.sun_path, "%s/" MUSER_SOCK, lm_ctx->iommu_dir);
+ if (ret >= (int)sizeof addr.sun_path) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* start listening business */
+ if ((ret = unlink(addr.sun_path)) == -1 && errno != ENOENT) {
+ return -1;
+ }
+ if ((ret = bind(fd, (struct sockaddr*)&addr, sizeof(addr))) == -1) {
+ return ret;
+ }
+ if ((ret = listen(fd, 0)) == -1) {
+ return ret;
+ }
+ return accept(fd, NULL, NULL);
+}
+
+static int
+get_request_sock(int fd, struct muser_cmd *cmd)
+{
+ return read(fd, cmd, sizeof *cmd);
+}
+
+static int
+send_response_sock(int fd, struct muser_cmd *cmd)
+{
+ return write(fd, cmd, sizeof *cmd);
+}
+
static void
get_path_from_fd(int fd, char *buf)
{
@@ -97,19 +258,37 @@ get_path_from_fd(int fd, char *buf)
buf[ret] = '\0';
}
-struct lm_ctx {
- void *pvt;
- dma_controller_t *dma;
- int fd;
- int (*reset) (void *pvt);
- lm_log_lvl_t log_lvl;
- lm_log_fn_t *log;
- lm_pci_info_t pci_info;
- lm_pci_config_space_t *pci_config_space;
- struct caps *caps;
- lm_irqs_t irqs; /* XXX must be last */
+ssize_t recv_fds_sock(int fd, void *buf, size_t size)
+{
+ ssize_t ret = muser_recv_fds(fd, buf, size / sizeof(int));
+ if (ret < 0) {
+ return ret;
+ }
+ return ret * sizeof(int);
+}
+
+static struct transport_ops {
+ int (*attach)(lm_ctx_t*, const char*);
+ int(*detach)(int fd);
+ int (*get_request)(int fd, struct muser_cmd*);
+ int (*send_response)(int fd, struct muser_cmd*);
+ ssize_t (*recv_fds)(int fd, void *buf, size_t size);
+} transports_ops[] = {
+ [LM_TRANS_KERNEL] = {
+ .attach = dev_attach,
+ .detach = dev_detach,
+ .recv_fds = read,
+ .get_request = get_request_kernel,
+ .send_response = send_response_kernel
+ },
+ [LM_TRANS_SOCK] = {
+ .attach = open_sock,
+ .detach = close,
+ .recv_fds = recv_fds_sock,
+ .get_request = get_request_sock,
+ .send_response = send_response_sock
+ }
};
-MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t);
#define LM2VFIO_IRQT(type) (type - 1)
@@ -558,7 +737,56 @@ do_muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd_ioctl *cmd_ioctl, void *data)
return lm_ctx->reset(lm_ctx->pvt);
}
lm_log(lm_ctx, LM_DBG, "reset called but not reset function present\n");
+ err = 0;
+ break;
+ case VFIO_GROUP_GET_STATUS:
+ cmd_ioctl->data.group_status.flags = VFIO_GROUP_FLAGS_VIABLE;
+ err = 0;
+ break;
+ case VFIO_GET_API_VERSION:
+ cmd_ioctl->data.vfio_api_version = VFIO_API_VERSION;
+ err = 0;
break;
+ case VFIO_CHECK_EXTENSION:
+ if (cmd_ioctl->data.vfio_extension == VFIO_TYPE1v2_IOMMU) {
+ err = 0;
+ }
+ break;
+ case VFIO_IOMMU_GET_INFO:
+ cmd_ioctl->data.iommu_type1_info.flags = VFIO_IOMMU_INFO_PGSIZES;
+ cmd_ioctl->data.iommu_type1_info.iova_pgsizes = sysconf(_SC_PAGESIZE);
+ err = 0;
+ break;
+ case VFIO_IOMMU_MAP_DMA:
+ {
+ struct muser_cmd muser_cmd = {
+ .type = MUSER_DMA_MMAP,
+ .mmap.request.fd = *((int*)data),
+ .mmap.request.addr = cmd_ioctl->data.dma_map.iova,
+ .mmap.request.len = cmd_ioctl->data.dma_map.size,
+ .mmap.request.offset = cmd_ioctl->data.dma_map.vaddr
+ };
+ err = muser_dma_map(lm_ctx, &muser_cmd);
+ }
+ break;
+ case VFIO_IOMMU_UNMAP_DMA:
+ {
+ struct muser_cmd muser_cmd = {
+ .type = MUSER_DMA_MUNMAP,
+ .mmap.request.addr = cmd_ioctl->data.dma_unmap.iova,
+ .mmap.request.len = cmd_ioctl->data.dma_unmap.size
+ };
+ err = muser_dma_unmap(lm_ctx, &muser_cmd);
+ }
+ break;
+ /* FIXME */
+ case VFIO_GROUP_SET_CONTAINER:
+ case VFIO_GROUP_UNSET_CONTAINER:
+ case VFIO_SET_IOMMU:
+ err = 0;
+ break;
+ default:
+ lm_log(lm_ctx, LM_ERR, "bad comamnd %d", cmd_ioctl->vfio_cmd);
}
return err;
@@ -618,13 +846,98 @@ muser_dma_map(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
lm_log(lm_ctx, LM_ERR, "failed to add DMA region fd=%d path=%s %#lx-%#lx: %d\n",
cmd->mmap.request.fd, buf, cmd->mmap.request.addr,
cmd->mmap.request.addr + cmd->mmap.request.len, err);
+ } else {
+ err = 0;
}
- return 0;
+ return err;
+}
+
+int
+muser_send_fds(int sock, int *fds, size_t count) {
+ struct msghdr msg = { 0 };
+ size_t size = count * sizeof *fds;
+ char buf[CMSG_SPACE(size)];
+ memset(buf, '\0', sizeof(buf));
+
+ /* XXX requires at least one byte */
+ struct iovec io = { .iov_base = "\0", .iov_len = 1 };
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = buf;
+ msg.msg_controllen = sizeof(buf);
+
+ struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(size);
+ memcpy(CMSG_DATA(cmsg), fds, size);
+ msg.msg_controllen = CMSG_SPACE(size);
+ return sendmsg(sock, &msg, 0);
+}
+
+ssize_t
+muser_recv_fds(int sock, int *fds, size_t count)
+{
+ int ret;
+ struct cmsghdr *cmsg;
+ size_t fds_size;
+ char msg_buf[sysconf(_SC_PAGESIZE)];
+ struct iovec io = {.iov_base = msg_buf, .iov_len = sizeof(msg_buf)};
+ char cmsg_buf[sysconf(_SC_PAGESIZE)];
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf)
+ };
+
+ if (fds == NULL || count <= 0) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ ret = recvmsg(sock, &msg, 0);
+ if (ret == -1) {
+ return ret;
+ }
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ fds_size = cmsg->cmsg_len - sizeof *cmsg;
+ if ((fds_size % sizeof(int)) != 0 || fds_size / sizeof (int) > count) {
+ errno = EINVAL;
+ return -1;
+ }
+ memcpy((void*)fds, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof *cmsg);
+
+ return fds_size / sizeof(int);
}
/*
* Callback that is executed when device memory is to be mmap'd.
+ *
+ * TODO vfio-over-socket: each PCI region can be sparsely memory mapped, so
+ * there can be multiple mapped regions per PCI region. We need to make these
+ * mapped regions persistent. One way would be to store each sparse region as
+ * an individual file named after the memory range, e.g.
+ * /dev/shm/muser/<UUID>/<region>/<offset>-<length> (the <region> can be <bar0>,
+ * <rom> etc.).
+ *
+ * Another way would be to create one file per PCI region and then
+ * tell libvfio which offset of each file corresponds to each region. The
+ * mapping between sparse regions and file offsets can be 1:1, so there can be
+ * large gaps in file which should be fine since it will be sparsely allocated.
+ * Alternatively, each sparse region can be put right next to each other so
+ * we'll need some kind of translation.
+ *
+ * However this functionality is implemented, it must be provided by libmuser.
+ * For now we don't do anything (except for receiving the file descriptors)
+ * and leave it to the device implementation to handle.
*/
static int
muser_mmap(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
@@ -655,6 +968,16 @@ muser_mmap(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
}
cmd->mmap.response = addr;
+ /* FIXME */
+ if (lm_ctx->trans == LM_TRANS_SOCK) {
+ err = muser_send_fds(lm_ctx->fd, (int*)&addr, 1);
+ if (err == -1) {
+ lm_log(lm_ctx, LM_ERR, "failed to send fd=%d: %d, %m\n",
+ *((int*)&addr), err);
+ }
+ err = 0;
+ }
+
out:
if (err != 0) {
lm_log(lm_ctx, LM_ERR, "failed to mmap device memory %#x-%#lx: %s\n",
@@ -837,10 +1160,8 @@ muser_access(lm_ctx_t *lm_ctx, struct muser_cmd *cmd, bool is_write)
return -1;
}
-#ifndef LM_TERSE_LOGGING
- lm_log(lm_ctx, LM_DBG, "%s %x@%lx\n", is_write ? "W" : "R", cmd->rw.count,
- cmd->rw.pos);
-#endif
+ lm_log(lm_ctx, LM_DBG, "%s %#lx-%#lx\n", is_write ? "W" : "R", cmd->rw.pos,
+ cmd->rw.pos + cmd->rw.count);
/* copy data to be written from kernel to user space */
if (is_write) {
@@ -898,10 +1219,12 @@ muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
void *data = NULL;
size_t size = 0;
int ret;
+ uint32_t flags;
/* TODO make this a function that returns the size */
- if (cmd->ioctl.vfio_cmd == VFIO_DEVICE_SET_IRQS) {
- uint32_t flags = cmd->ioctl.data.irq_set.flags;
+ switch (cmd->ioctl.vfio_cmd) {
+ case VFIO_DEVICE_SET_IRQS:
+ flags = cmd->ioctl.data.irq_set.flags;
switch ((flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) {
case VFIO_IRQ_SET_DATA_EVENTFD:
size = sizeof(int32_t) * cmd->ioctl.data.irq_set.count;
@@ -910,24 +1233,28 @@ muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
size = sizeof(uint8_t) * cmd->ioctl.data.irq_set.count;
break;
}
+ break;
+ case VFIO_IOMMU_MAP_DMA:
+ size = sizeof(int);
+ break;
}
if (size != 0) {
- data = calloc(1, size);
+ data = calloc(1, size); /* TODO use alloca */
if (data == NULL) {
#ifdef DEBUG
perror("calloc");
#endif
return -1;
}
-
- ret = read(lm_ctx->fd, data, size);
+ ret = transports_ops[lm_ctx->trans].recv_fds(lm_ctx->fd, data, size);
if (ret < 0) {
-#ifdef DEBUG
- perror("read failed");
-#endif
goto out;
}
+ if (ret != (int)size) {
+ lm_log(lm_ctx, LM_ERR, "short read for fds\n");
+ return -EINVAL;
+ }
}
ret = (int)do_muser_ioctl(lm_ctx, &cmd->ioctl, data);
@@ -945,10 +1272,15 @@ drive_loop(lm_ctx_t *lm_ctx)
int err;
do {
- err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_WAIT, &cmd);
+ err = transports_ops[lm_ctx->trans].get_request(lm_ctx->fd, &cmd);
if (err < 0) {
+ lm_log(lm_ctx, LM_ERR, "failed to receive request: %m\n");
return err;
}
+ if (err == 0) {
+ lm_log(lm_ctx, LM_INF, "end of file: %m\n");
+ break;
+ }
switch (cmd.type) {
case MUSER_IOCTL:
@@ -969,10 +1301,14 @@ drive_loop(lm_ctx_t *lm_ctx)
break;
default:
lm_log(lm_ctx, LM_ERR, "bad command %d\n", cmd.type);
- continue;
+ /*
+ * TODO should respond with something here instead of ignoring the
+ * command.
+ */
+ err = -EINVAL;
}
cmd.err = err;
- err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_DONE, &cmd);
+ err = transports_ops[lm_ctx->trans].send_response(lm_ctx->fd, &cmd);
if (err < 0) {
lm_log(lm_ctx, LM_ERR, "failed to complete command: %s\n",
strerror(errno));
@@ -994,31 +1330,7 @@ lm_ctx_drive(lm_ctx_t *lm_ctx)
return drive_loop(lm_ctx);
}
-static int
-dev_detach(int dev_fd)
-{
- return close(dev_fd);
-}
-
-static int
-dev_attach(const char *uuid)
-{
- char *path;
- int dev_fd;
- int err;
-
- err = asprintf(&path, "/dev/" MUSER_DEVNODE "/%s", uuid);
- if (err != (int)(strlen(MUSER_DEVNODE) + strlen(uuid) + 6)) {
- return -1;
- }
-
- dev_fd = open(path, O_RDWR);
-
- free(path);
-
- return dev_fd;
-}
-
+/* FIXME this is not enough anymore, check muser_mmap */
void *
lm_mmap(lm_ctx_t *lm_ctx, off_t offset, size_t length)
{
@@ -1076,11 +1388,40 @@ lm_ctx_destroy(lm_ctx_t *lm_ctx)
return;
}
+ /*
+ * FIXME The following cleanup can be dangerous depending on how lm_ctx_destroy
+ * is called since it might delete files it did not create. Improve by
+ * acquiring a lock on the directory.
+ */
+ if (lm_ctx->trans == LM_TRANS_SOCK) {
+ int ret;
+
+ if (lm_ctx->iommu_dir_fd != -1) {
+ if ((ret = unlinkat(lm_ctx->iommu_dir_fd, IOMMU_GRP_NAME, 0)) == -1 && errno != ENOENT) {
+ lm_log(lm_ctx, LM_DBG, "failed to remove " IOMMU_GRP_NAME ": %m\n");
+ }
+ if ((ret = unlinkat(lm_ctx->iommu_dir_fd, MUSER_SOCK, 0)) == -1 && errno != ENOENT) {
+ lm_log(lm_ctx, LM_DBG, "failed to remove " MUSER_SOCK ": %m\n");
+ }
+ if (close(lm_ctx->iommu_dir_fd) == -1) {
+ lm_log(lm_ctx, LM_DBG, "failed to close IOMMU dir fd %d: %m\n",
+ lm_ctx->iommu_dir_fd);
+ }
+ }
+ if (lm_ctx->iommu_dir != NULL) {
+ if ((ret = rmdir(lm_ctx->iommu_dir)) == -1 && errno != ENOENT) {
+ lm_log(lm_ctx, LM_DBG, "failed to remove %s: %m\n", lm_ctx->iommu_dir);
+ }
+ free(lm_ctx->iommu_dir);
+ }
+ }
+
free(lm_ctx->pci_config_space);
- dev_detach(lm_ctx->fd);
+ transports_ops[lm_ctx->trans].detach(lm_ctx->fd);
if (lm_ctx->dma != NULL) {
dma_controller_destroy(lm_ctx, lm_ctx->dma);
}
+ free_sparse_mmap_areas(lm_ctx->pci_info.reg_info);
free(lm_ctx);
// FIXME: Maybe close any open irq efds? Unmap stuff?
}
@@ -1203,6 +1544,11 @@ lm_ctx_create(const lm_dev_info_t *dev_info)
return NULL;
}
+ if (dev_info->trans < 0 || dev_info->trans >= LM_TRANS_MAX) {
+ errno = EINVAL;
+ return NULL;
+ }
+
/*
* FIXME need to check that the number of MSI and MSI-X IRQs are valid
* (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X).
@@ -1221,6 +1567,9 @@ lm_ctx_create(const lm_dev_info_t *dev_info)
if (lm_ctx == NULL) {
return NULL;
}
+ lm_ctx->trans = dev_info->trans;
+
+ lm_ctx->iommu_dir_fd = -1;
// Set context irq information.
for (i = 0; i < max_ivs; i++) {
@@ -1254,9 +1603,12 @@ lm_ctx_create(const lm_dev_info_t *dev_info)
}
// Attach to the muser control device.
- lm_ctx->fd = dev_attach(dev_info->uuid);
+ lm_ctx->fd = transports_ops[dev_info->trans].attach(lm_ctx, dev_info->uuid);
if (lm_ctx->fd == -1) {
err = errno;
+ if (errno != EINTR) {
+ lm_log(lm_ctx, LM_ERR, "failed to attach: %m\n");
+ }
goto out;
}
@@ -1271,7 +1623,7 @@ out:
if (err) {
if (lm_ctx) {
dma_controller_destroy(lm_ctx, lm_ctx->dma);
- dev_detach(lm_ctx->fd);
+ transports_ops[dev_info->trans].detach(lm_ctx->fd);
free_sparse_mmap_areas(lm_ctx->pci_info.reg_info);
free(lm_ctx->pci_config_space);
free(lm_ctx);
diff --git a/lib/muser.h b/lib/muser.h
index f3330fe..3f3a9fa 100644
--- a/lib/muser.h
+++ b/lib/muser.h
@@ -50,6 +50,12 @@
#define LM_TERSE_LOGGING 0
#endif
+#define VFIO_NAME "vfio"
+#define VFIO_DIR "/dev/" VFIO_NAME "/"
+#define VFIO_CONTAINER VFIO_DIR "/" VFIO_NAME
+
+#define MUSER_SOCK "cntrl"
+
typedef uint64_t dma_addr_t;
typedef struct {
@@ -149,7 +155,9 @@ enum {
LM_DEV_INTX_IRQ_IDX,
LM_DEV_MSI_IRQ_IDX,
LM_DEV_MSIX_IRQ_IDX,
- LM_DEV_NUM_IRQS = 3
+ LM_DEV_ERR_IRQ_INDEX,
+ LM_DEV_REQ_IRQ_INDEX,
+ LM_DEV_NUM_IRQS
};
enum {
@@ -247,6 +255,12 @@ typedef struct {
lm_cap_access_t *fn;
} lm_cap_t;
+typedef enum {
+ LM_TRANS_KERNEL,
+ LM_TRANS_SOCK,
+ LM_TRANS_MAX
+} lm_trans_t;
+
#define LM_MAX_CAPS (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF
/**
@@ -297,6 +311,8 @@ typedef struct {
* Number of capabilities in above array.
*/
int nr_caps;
+
+ lm_trans_t trans;
} lm_dev_info_t;
/**
@@ -436,6 +452,10 @@ lm_get_region(loff_t pos, size_t count, loff_t *off);
uint8_t *
lm_get_pci_non_std_config_space(lm_ctx_t *lm_ctx);
+/* FIXME */
+int muser_send_fds(int sock, int *fds, size_t count);
+ssize_t muser_recv_fds(int sock, int *fds, size_t count);
+
#endif /* LIB_MUSER_H */
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/libpathtrap b/libpathtrap
new file mode 160000
+Subproject 7a3a8242c9b31c39b26f4e2cf7f07659d7e272f
diff --git a/libvfio/CMakeLists.txt b/libvfio/CMakeLists.txt
new file mode 100644
index 0000000..b6af4c9
--- /dev/null
+++ b/libvfio/CMakeLists.txt
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2019 Nutanix Inc. All rights reserved.
+#
+# Authors: Thanos Makatos <thanos@nutanix.com>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Nutanix nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+add_library(vfio SHARED
+ ../libpathtrap/libpathtrap.h
+ ../libpathtrap/vma_addr.h
+ libvfio.c
+)
+target_link_libraries(vfio muser dl ${CMAKE_BINARY_DIR}/libpathtrap/libpathtrap.a)
+set(CMAKE_C_FLAGS "-Wall -Wextra -Werror -ldl")
diff --git a/libvfio/libvfio.c b/libvfio/libvfio.c
new file mode 100644
index 0000000..2487546
--- /dev/null
+++ b/libvfio/libvfio.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright (c) 2020 Nutanix Inc. All rights reserved.
+ *
+ * Authors: Thanos Makatos <thanos@nutanix.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Nutanix nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stddef.h>
+#include <assert.h>
+#include <errno.h>
+#include <linux/vfio.h>
+#include <string.h>
+#include <stdio.h>
+#include <linux/muser.h>
+#include <muser/muser.h>
+#include <muser/pci.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <dlfcn.h>
+
+#include "../libpathtrap/libpathtrap.h"
+#include "../libpathtrap/vma_addr.h"
+
+#ifdef DEBUG
+#define debug(fmt, ...) \
+ do { \
+ fprintf(stderr, "%s:%d " fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+ } while (0)
+#else
+#define debug(fmt, ...)
+#endif
+
+#define VFIO_NAME "vfio"
+#define VFIO_DIR "/dev/" VFIO_NAME "/"
+#define VFIO_CONTAINER VFIO_DIR "/" VFIO_NAME
+
+static int sock = -1;
+
+enum vfio_fd_type {
+ VFIO_FD_TYPE_CONTAINER,
+ VFIO_FD_TYPE_GROUP,
+ VFIO_FD_TYPE_DEVICE
+};
+
+struct vfio_fd {
+ enum vfio_fd_type type;
+ unsigned long iommu_grp; /* VFIO_FD_TYPE_GROUP only */
+};
+
+int __open(struct fake_fd *fake_fd, const char *pathname,
+ int flags __attribute__((unused)), void *priv) {
+
+ int fd = -1;
+ int err = 0;
+ struct vfio_fd *vfio_fd = calloc(1, sizeof *vfio_fd);
+
+ if (!vfio_fd)
+ return -1;
+
+ if ((fd = syscall(SYS_memfd_create, pathname, 0)) == -1) {
+ err = errno;
+ goto out;
+ }
+
+ if (!strncmp(pathname, VFIO_DIR, sizeof VFIO_DIR - 1)) {
+ if (!strcmp(pathname + sizeof VFIO_DIR -1, VFIO_NAME)) {
+ vfio_fd->type = VFIO_FD_TYPE_CONTAINER;
+ debug("container fd=%d\n", fd);
+ } else {
+ char *endptr;
+ vfio_fd->iommu_grp = strtoul(pathname + sizeof VFIO_DIR - 1, &endptr, 10);
+ if (*endptr != '\0' || (vfio_fd->iommu_grp == ULONG_MAX && errno == ERANGE)) {
+ err = EINVAL;
+ goto out;
+ }
+ vfio_fd->type = VFIO_FD_TYPE_GROUP;
+ debug("group fd=%d\n", fd);
+ }
+ } else {
+ if (!priv && *(bool*)priv != true) {
+ debug("bad path %s\n", pathname);
+ err = EINVAL;
+ goto out;
+ }
+ debug("device fd=%d\n", fd);
+ vfio_fd->type = VFIO_FD_TYPE_DEVICE;
+ }
+ fake_fd_set_priv(fake_fd, (void*)vfio_fd);
+out:
+ if (err) {
+ if (fd != -1)
+ close(fd);
+ free(vfio_fd);
+ errno = err;
+ return -1;
+ }
+ return fd;
+}
+
+int __close(struct fake_fd *fake_fd) {
+ return __real_close(fake_fd->fd);
+}
+
+bool __should_trap(const char *pathname) {
+ /*
+ * FIXME should only trap /dev/vfio/vfio and /dev/vfio/[0-9]+ in order
+ * in order to allow real VFIO devices to work, however since we already
+ * trap /dev/vfio/vfio those devices cannot work in the first place.
+ */
+ return 0 == strncmp(pathname, VFIO_DIR, sizeof VFIO_DIR -1);
+}
+
+ssize_t __read(struct fake_fd *fake_fd __attribute__((unused)),
+ void *buf, size_t count, off_t *offset) {
+
+ struct muser_cmd muser_cmd = {
+ .type = MUSER_READ,
+ .rw = {
+ .count = count,
+ .pos = *offset
+ }
+ };
+ size_t ret;
+
+#if 0
+ debug("R fd=%d %s %#lx-%#lx\n",
+ fake_fd->fd, fake_fd->pathname, *offset, *offset + count);
+#endif
+
+ /*
+ * FIXME need to set fd (which is the device fd) in muser_cmd
+ * so that it knows for which device it is.
+ */
+ if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ debug("failed to send command: %m\n");
+ return ret;
+ }
+ if ((ret = __real_read(sock, buf, count)) != count) {
+ debug("failed to read data: %m\n");
+ return ret;
+ }
+ if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ debug("failed to read response: %m\n");
+ return ret;
+ }
+ if (muser_cmd.err != (int)count) {
+ debug("bad response: %d\n", muser_cmd.err);
+ return muser_cmd.err;
+ }
+ return count;
+}
+
+ssize_t __write(struct fake_fd *fake_fd __attribute__((unused)),
+ const void *buf, size_t count, off_t *offset) {
+
+ struct muser_cmd muser_cmd = {
+ .type = MUSER_WRITE,
+ .rw = {
+ .count = count,
+ .pos = *offset
+ }
+ };
+ int ret;
+
+#if 0
+ debug("W fd=%d %s %#lx-%#lx\n",
+ fake_fd->fd, fake_fd->pathname, *offset, *offset + count);
+#endif
+
+ /*
+ * FIXME need to set fd (which is the device fd) in muser_cmd
+ * so that it knows for which device it is.
+ */
+ if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ debug("failed to send command (%d): %m\n", ret);
+ return ret;
+ }
+ if ((ret = __real_write(sock, buf, count)) != (int)count) {
+ debug("failed to send data (%d): %m\n", ret);
+ return ret;
+ }
+ if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ debug("failed to receive response (%d): %m\n", ret);
+ return ret;
+ }
+ if (muser_cmd.err) {
+ debug("command failed: %s\n", strerror(-muser_cmd.err));
+ errno = -muser_cmd.err;
+ return -1;
+ }
+ return count;
+}
+
+static int open_sock(struct vfio_fd *vfio_fd) {
+
+ int ret;
+ struct sockaddr_un addr = {.sun_family = AF_UNIX};
+
+ assert(vfio_fd);
+
+ ret = snprintf(addr.sun_path, sizeof addr.sun_path,
+ VFIO_DIR "%lu/" MUSER_SOCK, vfio_fd->iommu_grp);
+
+ if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+ debug("failed to open socket: %m\n");
+ return sock;
+ }
+
+ if ((ret = connect(sock, (struct sockaddr*)&addr, sizeof(addr))) == -1) {
+ debug("failed to connect to %s: %m\n", addr.sun_path);
+ return ret;
+ }
+
+ return sock;
+}
+
+static int vfio_group_get_device_fd(struct fake_fd *fake_fd __attribute__((unused)),
+ unsigned long args __attribute__((unused))) {
+ /* FIXME need to generate name based on passed fake_fd
+ * FIXME need to associate the fd we return with the passed fake_fd
+ */
+ bool flag = true;
+ return open_fake("device_fd", 0, &flag);
+}
+
+static int vfio_set_data_eventfd(struct muser_cmd *muser_cmd, int *fds, size_t size) {
+ if (muser_cmd->ioctl.data.irq_set.count * sizeof(int) != size) {
+ errno = EINVAL;
+ return -1;
+ }
+ return muser_send_fds(sock, fds, muser_cmd->ioctl.data.irq_set.count);
+}
+
+static int map_dma(vma_info_t *vma_info) {
+ return muser_send_fds(sock, &vma_info->fd, 1);
+}
+
+
+int __ioctl(struct fake_fd *fake_fd, unsigned int cmd, unsigned long args) {
+
+ int ret = 0;
+ struct muser_cmd muser_cmd = { 0 };
+ ssize_t minsz, argsz;
+ vma_info_t vma_info;
+
+ muser_cmd.type = MUSER_IOCTL;
+ muser_cmd.ioctl.vfio_cmd = cmd;
+
+ if (sock == -1)
+ if ((sock = open_sock((struct vfio_fd*)fake_fd->priv)) == -1)
+ return sock;
+
+ if ((minsz = get_minsz(cmd)) < 0) {
+ debug("bad minsz=%lu\n", minsz);
+ return -EINVAL;
+ }
+
+ /*
+ * Initialize muser_cmd.
+ */
+ switch (cmd) {
+ case VFIO_CHECK_EXTENSION:
+ muser_cmd.ioctl.data.vfio_extension = (int)args;
+ break;
+ case VFIO_SET_IOMMU:
+ muser_cmd.ioctl.data.iommu_type = (int)args;
+ break;
+ case VFIO_GROUP_UNSET_CONTAINER:
+ muser_cmd.ioctl.data.container_fd = (int)args;
+ break;
+ case VFIO_GROUP_GET_DEVICE_FD:
+ muser_cmd.ioctl.data.device_fd = (int)args;
+ return vfio_group_get_device_fd(fake_fd,
+ muser_cmd.ioctl.data.device_fd);
+ default:
+ memcpy(&muser_cmd.ioctl.data, (void*)args, minsz);
+ }
+
+ switch (cmd) {
+ case VFIO_DEVICE_SET_IRQS: /* TODO this can go into switch above */
+ switch (muser_cmd.ioctl.data.irq_set.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+ case VFIO_IRQ_SET_DATA_EVENTFD:
+ break;
+ case VFIO_IRQ_SET_DATA_NONE:
+ case VFIO_IRQ_SET_DATA_BOOL:
+ debug("ignore IRQ set %#x\n",
+ muser_cmd.ioctl.data.irq_set.flags);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case VFIO_IOMMU_MAP_DMA:
+ ret = vma_addr((void*)muser_cmd.ioctl.data.dma_map.vaddr, &vma_info);
+ if (ret != 0 || vma_info.fd == -1 || vma_info.map_pvt) {
+ debug("ignore vma for vaddr=%#llx, iova=%#llx-%#llx: %m\n",
+ muser_cmd.ioctl.data.dma_map.vaddr,
+ muser_cmd.ioctl.data.dma_map.iova,
+ muser_cmd.ioctl.data.dma_map.iova + muser_cmd.ioctl.data.dma_map.size);
+ return 0;
+ }
+ /*
+ * FIXME abuse field to communicate file offset so that
+ * we don't need to introduce a new struct.
+ */
+ muser_cmd.ioctl.data.dma_map.vaddr -= (__u64)vma_info.vma_start;
+ break;
+ }
+
+ if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) == -1) {
+ debug("failed to send command: %m\n");
+ return ret;
+ }
+
+ switch (cmd) {
+ case VFIO_DEVICE_SET_IRQS:
+ if ((argsz = get_argsz(cmd, &muser_cmd)) < 0)
+ return argsz;
+
+ /*
+ * FIXME we can also send the muser_cmd as part of the payload,
+ * instead of sending it separately at an earlier point. This
+ * would be ideal also for DMA_MAP as we only have to send one
+ * fd. For IRQs there doesn't seem to be a limit, so I'm not
+ * sure how it will be handled at the other end, e.g. will there
+ * have to be a maximum message size so we might have to spit
+ * it?
+ */
+ ret = vfio_set_data_eventfd(&muser_cmd, (int*)(args + minsz),
+ argsz - minsz);
+ if (ret == -1)
+ return ret;
+ break;
+ case VFIO_IOMMU_MAP_DMA:
+ if ((ret = map_dma(&vma_info)) == -1) {
+ debug("failed to map DMA: %m\n");
+ return ret;
+ }
+ break;
+ case VFIO_DEVICE_GET_REGION_INFO:
+ if ((argsz = get_argsz(cmd, &muser_cmd)) < 0)
+ return argsz;
+ if (argsz > minsz) {
+ /*
+ * FIXME we must now read the response from libmuser, which will
+ * be at least sizeof(struct muser_cmd) bytes. Whatever we read
+ * before those last bytes is sparse info (can be zero). Non-sparse
+ * information must be stored in the passed struct vfio_region_info
+ * (flags/cap_offset/size/offset), which is the args argument of
+ * this function.
+ *
+ * The first time the user calls VFIO_DEVICE_GET_REGION_INFO,
+ * the struct vfio_region_info is not large enough to accomodate
+ * any sparse info. If there is sparse info, it is indicated so
+ * by libmuser by setting the argz and cap_offset fields accordingly
+ * in order to indicate the required space. Then the user calls
+ * again VFIO_DEVICE_GET_REGION_INFO with argz sufficiently large
+ * to hold the sparse information (check function
+ * dev_get_sparse_mmap_cap).
+ *
+ * Therefore if argz is larger than sizeof struct vfio_region_info,
+ * we need to read argz - sizeof struct vfio_region_info bytes
+ * which will be the sparse information, plus the response (sizeof struct muser_cmd).
+ *
+ * libmuser replies first with sparse info and then with
+ * struct vfio_region_info, however the args argument
+ * contains them in the reverse order.
+ */
+ if ((ret = __real_read(sock, (void*)(args + minsz), argsz - minsz)) != argsz - minsz) {
+ debug("short read: %d/%ld\n", ret, argsz - minsz);
+ return -1;
+ }
+ }
+ break;
+ }
+ if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ return ret;
+ }
+ if (cmd == VFIO_IOMMU_UNMAP_DMA && muser_cmd.err == -ENOENT) {
+ muser_cmd.err = 0;
+ }
+ if (muser_cmd.err) {
+ debug("VFIO command %s failed with %d\n",
+ vfio_cmd_to_str(cmd), muser_cmd.err);
+ return muser_cmd.err;
+ }
+
+ switch (cmd) {
+ case VFIO_GET_API_VERSION:
+ return muser_cmd.ioctl.data.vfio_api_version;
+ case VFIO_CHECK_EXTENSION:
+ return muser_cmd.ioctl.data.vfio_extension;
+ case VFIO_SET_IOMMU:
+ case VFIO_DEVICE_RESET:
+ return 0;
+ }
+
+ memcpy((void*)args, &muser_cmd.ioctl.data, minsz);
+
+ return 0;
+}
+
+int ____xstat(int ver __attribute__((unused)), const char *filename __attribute__((unused)),
+ struct stat *stat_buf) {
+
+ memset(stat_buf, 0, sizeof *stat_buf);
+ stat_buf->st_mode = S_IFREG | S_IRWXU | S_IRWXG | S_IXGRP | S_IROTH | S_IXOTH;
+ return 0;
+}
+
+int ____xstat64(int ver __attribute__((unused)),
+ const char *filename __attribute__((unused)),
+ struct stat64 *stat_buf) {
+
+ memset(stat_buf, 0, sizeof *stat_buf);
+ stat_buf->st_mode = S_IFREG | S_IRWXU | S_IRWXG | S_IXGRP | S_IROTH | S_IXOTH;
+ return 0;
+}
+
+char *__realpath(const char *path, char *resolved_path) {
+ if (!resolved_path)
+ resolved_path = strdup(path);
+ else
+ strcpy(resolved_path, path);
+ return resolved_path;
+}
+
+static void *__mmap64(void *addr __attribute__((unused)), size_t length, int prot, int flags,
+ struct fake_fd *fake_fd, off_t offset) {
+
+ int ret;
+ struct muser_cmd muser_cmd = {
+ .type = MUSER_MMAP,
+ .mmap.request = {
+ .len = length,
+ .addr = offset
+ }
+ };
+ int fd;
+
+ assert(fake_fd->priv);
+
+ if (((struct vfio_fd*)fake_fd->priv)->type != VFIO_FD_TYPE_DEVICE) {
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) == -1) {
+ debug("failed to send command: %m\n");
+ return MAP_FAILED;
+ }
+ ret = muser_recv_fds(sock, &fd, 1);
+ if (ret != 1) {
+ debug("failed to receive device memory fd (%d): %m\n", ret);
+ return MAP_FAILED;
+ }
+ if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) {
+ debug("failed to receive response (%d): %m\n", ret);
+ return MAP_FAILED;
+ }
+ if (muser_cmd.err) {
+ debug("command failed: %s\n", strerror(-muser_cmd.err));
+ errno = -muser_cmd.err;
+ return MAP_FAILED;
+ }
+ return __real_mmap64(NULL, length, prot, flags, fd, offset);
+}
+
+struct ops ops = {
+ .should_trap = &__should_trap,
+ .open = &__open,
+ .close = &__close,
+ .read = &__read,
+ .write = &__write,
+ .ioctl = &__ioctl,
+ .__xstat = &____xstat,
+ .__xstat64 = &____xstat64,
+ .__lxstat64 = &____xstat64,
+ .realpath = &__realpath,
+ .mmap64 = &__mmap64
+};
+
+__attribute__((constructor)) static void ctor()
+{
+}
diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c
index 285b600..bf35e49 100644
--- a/samples/gpio-pci-idio-16.c
+++ b/samples/gpio-pci-idio-16.c
@@ -36,9 +36,16 @@
#include <stdio.h>
#include <err.h>
#include <stdlib.h>
+#include <unistd.h>
#include "../lib/muser.h"
+static void
+_log(void *pvt, char const *msg)
+{
+ fprintf(stderr, "gpio: %s", msg);
+}
+
ssize_t
bar2_access(void *pvt, char * const buf, size_t count, loff_t offset,
const bool is_write)
@@ -51,15 +58,34 @@ bar2_access(void *pvt, char * const buf, size_t count, loff_t offset,
return count;
}
-int main(int argc, char **argv)
+int main(int argc, char *argv[])
{
int ret;
+ bool trans_sock = false, verbose = false;
+ char opt;
+
+ while ((opt = getopt(argc, argv, "sv")) != -1) {
+ switch (opt) {
+ case 's':
+ trans_sock = true;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default: /* '?' */
+ fprintf(stderr, "Usage: %s [-s] [-d] UUID\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
- if (argc != 2) {
+ if (optind >= argc) {
err(EXIT_FAILURE, "missing MUSER device UUID");
}
lm_dev_info_t dev_info = {
+ .trans = trans_sock ? LM_TRANS_SOCK : LM_TRANS_KERNEL,
+ .log = verbose ? _log : NULL,
+ .log_lvl = LM_DBG,
.pci_info = {
.id = {.vid = 0x494F, .did = 0x0DC8 },
.reg_info[LM_DEV_BAR2_REG_IDX] = {
@@ -69,7 +95,7 @@ int main(int argc, char **argv)
},
.irq_count[LM_DEV_INTX_IRQ_IDX] = 1,
},
- .uuid = argv[1],
+ .uuid = argv[optind],
};
ret = lm_ctx_run(&dev_info);