diff options
author | Thanos Makatos <thanos.makatos@nutanix.com> | 2020-03-24 12:06:26 -0400 |
---|---|---|
committer | Thanos Makatos <thanos.makatos@nutanix.com> | 2020-03-25 10:36:29 -0400 |
commit | 8435c007567fdd0a92ed1f4e8dd7b60bb09ae116 (patch) | |
tree | 67fcfe389226d546bbe67bac7bf32f3cadfdd922 | |
parent | 9a8dddb7ed5c5d4ac0bc1ff89af795e5fe312c86 (diff) | |
download | libvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.zip libvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.tar.gz libvfio-user-8435c007567fdd0a92ed1f4e8dd7b60bb09ae116.tar.bz2 |
introduce vfio-over-socket transport
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | kmod/muser.c | 33 | ||||
-rw-r--r-- | kmod/muser.h | 94 | ||||
-rw-r--r-- | lib/libmuser.c | 464 | ||||
-rw-r--r-- | lib/muser.h | 22 | ||||
m--------- | libpathtrap | 0 | ||||
-rw-r--r-- | libvfio/CMakeLists.txt | 35 | ||||
-rw-r--r-- | libvfio/libvfio.c | 513 | ||||
-rw-r--r-- | samples/gpio-pci-idio-16.c | 32 |
10 files changed, 1108 insertions, 93 deletions
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..6848f53 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "libpathtrap"] + path = libpathtrap + url = git@github.com:tmakatos/libpathtrap.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 7975983..8ec6e5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,11 +32,14 @@ cmake_minimum_required (VERSION 2.6) project(muser) include(GNUInstallDirs) -# shared library +# shared libraries add_subdirectory(lib) +add_subdirectory(libpathtrap) +add_subdirectory(libvfio) # kernel module add_subdirectory(kmod) # samples add_subdirectory(samples) + diff --git a/kmod/muser.c b/kmod/muser.c index 9e5a2c8..53cc3d8 100644 --- a/kmod/muser.c +++ b/kmod/muser.c @@ -1046,37 +1046,6 @@ err: return ret; } -static ssize_t get_minsz(unsigned int cmd) -{ - switch (cmd) { - case VFIO_DEVICE_GET_INFO: - return offsetofend(struct vfio_device_info, num_irqs); - case VFIO_DEVICE_GET_REGION_INFO: - return offsetofend(struct vfio_region_info, offset); - case VFIO_DEVICE_GET_IRQ_INFO: - return offsetofend(struct vfio_irq_info, count); - case VFIO_DEVICE_SET_IRQS: - return offsetofend(struct vfio_irq_set, count); - } - return -EOPNOTSUPP; -} - -static ssize_t get_argsz(unsigned int cmd, struct mudev_cmd *mucmd) -{ - switch (cmd) { - case VFIO_DEVICE_GET_INFO: - return mucmd->muser_cmd.ioctl.data.dev_info.argsz; - case VFIO_DEVICE_GET_REGION_INFO: - return mucmd->muser_cmd.ioctl.data.reg_info.argsz; - case VFIO_DEVICE_GET_IRQ_INFO: - return mucmd->muser_cmd.ioctl.data.irq_info.argsz; - case VFIO_DEVICE_SET_IRQS: - return mucmd->muser_cmd.ioctl.data.irq_set.argsz; - } - - return -EOPNOTSUPP; -} - static int muser_ioctl_setup_cmd(struct mudev_cmd *mucmd, unsigned int cmd, unsigned long arg) { @@ -1095,7 +1064,7 @@ static int muser_ioctl_setup_cmd(struct mudev_cmd *mucmd, unsigned int cmd, return err; /* Fetch argsz provided by caller. */ - argsz = get_argsz(cmd, mucmd); + argsz = get_argsz(cmd, &mucmd->muser_cmd); if (argsz < 0) return argsz; diff --git a/kmod/muser.h b/kmod/muser.h index 65841a4..9791736 100644 --- a/kmod/muser.h +++ b/kmod/muser.h @@ -13,6 +13,14 @@ #ifndef __KERNEL__ #include <sys/types.h> +#include <stddef.h> +#include <errno.h> + +/* FIXME copied from include/linux/stddef.h, is this OK license-wise? */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + #endif #include <linux/ioctl.h> @@ -41,6 +49,15 @@ struct muser_cmd_ioctl { struct vfio_region_info reg_info; struct vfio_irq_info irq_info; struct vfio_irq_set irq_set; + struct vfio_group_status group_status; + int vfio_api_version; + int vfio_extension; + int container_fd; + int device_fd; + int iommu_type; + struct vfio_iommu_type1_info iommu_type1_info; + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; } data; }; @@ -70,4 +87,81 @@ struct muser_cmd { #define MUSER_DEV_CMD_WAIT _IOR('M', 1, struct muser_cmd) #define MUSER_DEV_CMD_DONE _IOW('M', 2, struct muser_cmd) +static inline ssize_t get_minsz(unsigned int cmd) +{ + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + return offsetofend(struct vfio_device_info, num_irqs); + case VFIO_DEVICE_GET_REGION_INFO: + return offsetofend(struct vfio_region_info, offset); + case VFIO_DEVICE_GET_IRQ_INFO: + return offsetofend(struct vfio_irq_info, count); + case VFIO_DEVICE_SET_IRQS: + return offsetofend(struct vfio_irq_set, count); + case VFIO_GROUP_GET_STATUS: + return offsetofend(struct vfio_group_status, flags); + case VFIO_GET_API_VERSION: + return 0; + case VFIO_CHECK_EXTENSION: + case VFIO_GROUP_SET_CONTAINER: + case VFIO_GROUP_UNSET_CONTAINER: + case VFIO_SET_IOMMU: + return sizeof(int); + case VFIO_IOMMU_GET_INFO: + return offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + case VFIO_IOMMU_MAP_DMA: + return offsetofend(struct vfio_iommu_type1_dma_map, size); + case VFIO_IOMMU_UNMAP_DMA: + return offsetofend(struct vfio_iommu_type1_dma_unmap, size); + case VFIO_GROUP_GET_DEVICE_FD: + case VFIO_DEVICE_RESET: + return 0; + } + return -EOPNOTSUPP; +} + +static inline ssize_t get_argsz(unsigned int cmd, struct muser_cmd *muser_cmd) +{ + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + return muser_cmd->ioctl.data.dev_info.argsz; + case VFIO_DEVICE_GET_REGION_INFO: + return muser_cmd->ioctl.data.reg_info.argsz; + case VFIO_DEVICE_GET_IRQ_INFO: + return muser_cmd->ioctl.data.irq_info.argsz; + case VFIO_DEVICE_SET_IRQS: + return muser_cmd->ioctl.data.irq_set.argsz; + } + + return -EOPNOTSUPP; +} + +static inline const char* vfio_cmd_to_str(int cmd) { + switch (cmd) { + case VFIO_GET_API_VERSION: return "VFIO_GET_API_VERSION"; + case VFIO_CHECK_EXTENSION: return "VFIO_CHECK_EXTENSION"; + case VFIO_SET_IOMMU: return "VFIO_SET_IOMMU"; + case VFIO_GROUP_GET_STATUS: return "VFIO_GROUP_GET_STATUS"; + case VFIO_GROUP_SET_CONTAINER: return "VFIO_GROUP_SET_CONTAINER"; + case VFIO_GROUP_UNSET_CONTAINER: return "VFIO_GROUP_UNSET_CONTAINER"; + case VFIO_GROUP_GET_DEVICE_FD: return "VFIO_GROUP_GET_DEVICE_FD"; + case VFIO_DEVICE_GET_INFO: return "VFIO_DEVICE_GET_INFO"; + case VFIO_DEVICE_GET_REGION_INFO: return "VFIO_DEVICE_GET_REGION_INFO"; + case VFIO_DEVICE_GET_IRQ_INFO: return "VFIO_DEVICE_GET_IRQ_INFO"; + case VFIO_DEVICE_SET_IRQS: return "VFIO_DEVICE_SET_IRQS"; + case VFIO_DEVICE_RESET: return "VFIO_DEVICE_RESET"; + case VFIO_IOMMU_GET_INFO: return "VFIO_IOMMU_GET_INFO/VFIO_DEVICE_GET_PCI_HOT_RESET_INFO/VFIO_IOMMU_SPAPR_TCE_GET_INFO"; + case VFIO_IOMMU_MAP_DMA: return "VFIO_IOMMU_MAP_DMA/VFIO_DEVICE_PCI_HOT_RESET"; + case VFIO_IOMMU_UNMAP_DMA: return "VFIO_IOMMU_UNMAP_DMA"; + case VFIO_IOMMU_ENABLE: return "VFIO_IOMMU_ENABLE"; + case VFIO_IOMMU_DISABLE: return "VFIO_IOMMU_DISABLE"; + case VFIO_EEH_PE_OP: return "VFIO_EEH_PE_OP"; + case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_REGISTER_MEMORY"; + case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY"; + case VFIO_IOMMU_SPAPR_TCE_CREATE: return "VFIO_IOMMU_SPAPR_TCE_CREATE"; + case VFIO_IOMMU_SPAPR_TCE_REMOVE: return "VFIO_IOMMU_SPAPR_TCE_REMOVE"; + } + return NULL; +} + #endif /* _UAPI_LINUX_MUSER_H */ diff --git a/lib/libmuser.c b/lib/libmuser.c index e72efba..ff46ed9 100644 --- a/lib/libmuser.c +++ b/lib/libmuser.c @@ -47,6 +47,11 @@ #include <stdarg.h> #include <linux/vfio.h> #include <sys/param.h> +#include <sys/un.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> #include "../kmod/muser.h" #include "muser.h" @@ -54,6 +59,8 @@ #include "dma.h" #include "cap.h" +#define IOMMU_GRP_NAME "iommu_group" + typedef enum { IRQ_NONE = 0, IRQ_INTX, @@ -77,6 +84,160 @@ typedef struct { _Static_assert(sizeof(s) - offsetof(s, m) == sizeof(t), \ #t " " #m " must be last member in " #s) +struct lm_ctx { + void *pvt; + dma_controller_t *dma; + int fd; + int (*reset) (void *pvt); + lm_log_lvl_t log_lvl; + lm_log_fn_t *log; + lm_pci_info_t pci_info; + lm_pci_config_space_t *pci_config_space; + lm_trans_t trans; + struct caps *caps; + + /* LM_TRANS_SOCK */ + char *iommu_dir; + int iommu_dir_fd; + + lm_irqs_t irqs; /* XXX must be last */ +} __attribute__((packed)); /* FIXME packed required to make below macro work */ +MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t); + + +/* function prototypes */ +static int +muser_dma_map(lm_ctx_t*, struct muser_cmd*); + +static int +muser_dma_unmap(lm_ctx_t*, struct muser_cmd*); + +static void +free_sparse_mmap_areas(lm_reg_info_t*); + +static int +dev_detach(int dev_fd) +{ + return close(dev_fd); +} + +static int +dev_attach(lm_ctx_t *lm_ctx __attribute__((unused)), const char *uuid) +{ + char *path; + int dev_fd; + int err; + + err = asprintf(&path, "/dev/" MUSER_DEVNODE "/%s", uuid); + if (err != (int)(strlen(MUSER_DEVNODE) + strlen(uuid) + 6)) { + return -1; + } + + dev_fd = open(path, O_RDWR); + + free(path); + + return dev_fd; +} + +static int +get_request_kernel(int fd, struct muser_cmd *cmd) +{ + return ioctl(fd, MUSER_DEV_CMD_WAIT, &cmd); +} + +static int +send_response_kernel(int fd, struct muser_cmd *cmd) +{ + return ioctl(fd, MUSER_DEV_CMD_DONE, &cmd); +} + +static int +open_sock(lm_ctx_t *lm_ctx, const char *uuid) +{ + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + int ret, fd; + unsigned long iommu_grp; + char *endptr; + + assert(lm_ctx != NULL); + assert(uuid != NULL); + + /* + * FIXME simplify by creating everything under a temporary directory and + * then atomically rename + */ + + iommu_grp = strtoul(uuid, &endptr, 10); + if (*endptr != '\0' || (iommu_grp == ULONG_MAX && errno == ERANGE)) { + errno = EINVAL; + return -1; + } + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + return fd; + } + + /* create /dev/vfio */ + if (mkdir(VFIO_DIR, 0755) == -1 && errno != EEXIST) { + return -1; + } + + /* create /dev/vfio/<IOMMU group> */ + if ((ret = asprintf(&lm_ctx->iommu_dir, VFIO_DIR "%lu", iommu_grp)) == -1) { + return -1; + } + if (mkdir(lm_ctx->iommu_dir, 0755) == -1) { + return -1; + } + + if ((lm_ctx->iommu_dir_fd = open(lm_ctx->iommu_dir, O_DIRECTORY)) == -1) { + return -1; + } + + /* crealte symlink /dev/vfio/<IOMMU group>/iommu_group -> ../<IOMMU group> */ + if ((ret = symlinkat(lm_ctx->iommu_dir, lm_ctx->iommu_dir_fd, IOMMU_GRP_NAME)) == -1) { + return -1; + } + + /* create control socket */ + if ((ret = openat(lm_ctx->iommu_dir_fd, MUSER_SOCK, O_WRONLY | O_CREAT)) == -1) { + return -1; + } + ret = snprintf(addr.sun_path, sizeof addr.sun_path, "%s/" MUSER_SOCK, lm_ctx->iommu_dir); + if (ret >= (int)sizeof addr.sun_path) { + errno = ENAMETOOLONG; + return -1; + } + if (ret < 0) { + return ret; + } + + /* start listening business */ + if ((ret = unlink(addr.sun_path)) == -1 && errno != ENOENT) { + return -1; + } + if ((ret = bind(fd, (struct sockaddr*)&addr, sizeof(addr))) == -1) { + return ret; + } + if ((ret = listen(fd, 0)) == -1) { + return ret; + } + return accept(fd, NULL, NULL); +} + +static int +get_request_sock(int fd, struct muser_cmd *cmd) +{ + return read(fd, cmd, sizeof *cmd); +} + +static int +send_response_sock(int fd, struct muser_cmd *cmd) +{ + return write(fd, cmd, sizeof *cmd); +} + static void get_path_from_fd(int fd, char *buf) { @@ -97,19 +258,37 @@ get_path_from_fd(int fd, char *buf) buf[ret] = '\0'; } -struct lm_ctx { - void *pvt; - dma_controller_t *dma; - int fd; - int (*reset) (void *pvt); - lm_log_lvl_t log_lvl; - lm_log_fn_t *log; - lm_pci_info_t pci_info; - lm_pci_config_space_t *pci_config_space; - struct caps *caps; - lm_irqs_t irqs; /* XXX must be last */ +ssize_t recv_fds_sock(int fd, void *buf, size_t size) +{ + ssize_t ret = muser_recv_fds(fd, buf, size / sizeof(int)); + if (ret < 0) { + return ret; + } + return ret * sizeof(int); +} + +static struct transport_ops { + int (*attach)(lm_ctx_t*, const char*); + int(*detach)(int fd); + int (*get_request)(int fd, struct muser_cmd*); + int (*send_response)(int fd, struct muser_cmd*); + ssize_t (*recv_fds)(int fd, void *buf, size_t size); +} transports_ops[] = { + [LM_TRANS_KERNEL] = { + .attach = dev_attach, + .detach = dev_detach, + .recv_fds = read, + .get_request = get_request_kernel, + .send_response = send_response_kernel + }, + [LM_TRANS_SOCK] = { + .attach = open_sock, + .detach = close, + .recv_fds = recv_fds_sock, + .get_request = get_request_sock, + .send_response = send_response_sock + } }; -MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t); #define LM2VFIO_IRQT(type) (type - 1) @@ -558,7 +737,56 @@ do_muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd_ioctl *cmd_ioctl, void *data) return lm_ctx->reset(lm_ctx->pvt); } lm_log(lm_ctx, LM_DBG, "reset called but not reset function present\n"); + err = 0; + break; + case VFIO_GROUP_GET_STATUS: + cmd_ioctl->data.group_status.flags = VFIO_GROUP_FLAGS_VIABLE; + err = 0; + break; + case VFIO_GET_API_VERSION: + cmd_ioctl->data.vfio_api_version = VFIO_API_VERSION; + err = 0; break; + case VFIO_CHECK_EXTENSION: + if (cmd_ioctl->data.vfio_extension == VFIO_TYPE1v2_IOMMU) { + err = 0; + } + break; + case VFIO_IOMMU_GET_INFO: + cmd_ioctl->data.iommu_type1_info.flags = VFIO_IOMMU_INFO_PGSIZES; + cmd_ioctl->data.iommu_type1_info.iova_pgsizes = sysconf(_SC_PAGESIZE); + err = 0; + break; + case VFIO_IOMMU_MAP_DMA: + { + struct muser_cmd muser_cmd = { + .type = MUSER_DMA_MMAP, + .mmap.request.fd = *((int*)data), + .mmap.request.addr = cmd_ioctl->data.dma_map.iova, + .mmap.request.len = cmd_ioctl->data.dma_map.size, + .mmap.request.offset = cmd_ioctl->data.dma_map.vaddr + }; + err = muser_dma_map(lm_ctx, &muser_cmd); + } + break; + case VFIO_IOMMU_UNMAP_DMA: + { + struct muser_cmd muser_cmd = { + .type = MUSER_DMA_MUNMAP, + .mmap.request.addr = cmd_ioctl->data.dma_unmap.iova, + .mmap.request.len = cmd_ioctl->data.dma_unmap.size + }; + err = muser_dma_unmap(lm_ctx, &muser_cmd); + } + break; + /* FIXME */ + case VFIO_GROUP_SET_CONTAINER: + case VFIO_GROUP_UNSET_CONTAINER: + case VFIO_SET_IOMMU: + err = 0; + break; + default: + lm_log(lm_ctx, LM_ERR, "bad comamnd %d", cmd_ioctl->vfio_cmd); } return err; @@ -618,13 +846,98 @@ muser_dma_map(lm_ctx_t *lm_ctx, struct muser_cmd *cmd) lm_log(lm_ctx, LM_ERR, "failed to add DMA region fd=%d path=%s %#lx-%#lx: %d\n", cmd->mmap.request.fd, buf, cmd->mmap.request.addr, cmd->mmap.request.addr + cmd->mmap.request.len, err); + } else { + err = 0; } - return 0; + return err; +} + +int +muser_send_fds(int sock, int *fds, size_t count) { + struct msghdr msg = { 0 }; + size_t size = count * sizeof *fds; + char buf[CMSG_SPACE(size)]; + memset(buf, '\0', sizeof(buf)); + + /* XXX requires at least one byte */ + struct iovec io = { .iov_base = "\0", .iov_len = 1 }; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + + struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(size); + memcpy(CMSG_DATA(cmsg), fds, size); + msg.msg_controllen = CMSG_SPACE(size); + return sendmsg(sock, &msg, 0); +} + +ssize_t +muser_recv_fds(int sock, int *fds, size_t count) +{ + int ret; + struct cmsghdr *cmsg; + size_t fds_size; + char msg_buf[sysconf(_SC_PAGESIZE)]; + struct iovec io = {.iov_base = msg_buf, .iov_len = sizeof(msg_buf)}; + char cmsg_buf[sysconf(_SC_PAGESIZE)]; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf) + }; + + if (fds == NULL || count <= 0) { + errno = EINVAL; + return -1; + } + + ret = recvmsg(sock, &msg, 0); + if (ret == -1) { + return ret; + } + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + errno = EINVAL; + return -1; + } + fds_size = cmsg->cmsg_len - sizeof *cmsg; + if ((fds_size % sizeof(int)) != 0 || fds_size / sizeof (int) > count) { + errno = EINVAL; + return -1; + } + memcpy((void*)fds, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof *cmsg); + + return fds_size / sizeof(int); } /* * Callback that is executed when device memory is to be mmap'd. + * + * TODO vfio-over-socket: each PCI region can be sparsely memory mapped, so + * there can be multiple mapped regions per PCI region. We need to make these + * mapped regions persistent. One way would be to store each sparse region as + * an individual file named after the memory range, e.g. + * /dev/shm/muser/<UUID>/<region>/<offset>-<length> (the <region> can be <bar0>, + * <rom> etc.). + * + * Another way would be to create one file per PCI region and then + * tell libvfio which offset of each file corresponds to each region. The + * mapping between sparse regions and file offsets can be 1:1, so there can be + * large gaps in file which should be fine since it will be sparsely allocated. + * Alternatively, each sparse region can be put right next to each other so + * we'll need some kind of translation. + * + * However this functionality is implemented, it must be provided by libmuser. + * For now we don't do anything (except for receiving the file descriptors) + * and leave it to the device implementation to handle. */ static int muser_mmap(lm_ctx_t *lm_ctx, struct muser_cmd *cmd) @@ -655,6 +968,16 @@ muser_mmap(lm_ctx_t *lm_ctx, struct muser_cmd *cmd) } cmd->mmap.response = addr; + /* FIXME */ + if (lm_ctx->trans == LM_TRANS_SOCK) { + err = muser_send_fds(lm_ctx->fd, (int*)&addr, 1); + if (err == -1) { + lm_log(lm_ctx, LM_ERR, "failed to send fd=%d: %d, %m\n", + *((int*)&addr), err); + } + err = 0; + } + out: if (err != 0) { lm_log(lm_ctx, LM_ERR, "failed to mmap device memory %#x-%#lx: %s\n", @@ -837,10 +1160,8 @@ muser_access(lm_ctx_t *lm_ctx, struct muser_cmd *cmd, bool is_write) return -1; } -#ifndef LM_TERSE_LOGGING - lm_log(lm_ctx, LM_DBG, "%s %x@%lx\n", is_write ? "W" : "R", cmd->rw.count, - cmd->rw.pos); -#endif + lm_log(lm_ctx, LM_DBG, "%s %#lx-%#lx\n", is_write ? "W" : "R", cmd->rw.pos, + cmd->rw.pos + cmd->rw.count); /* copy data to be written from kernel to user space */ if (is_write) { @@ -898,10 +1219,12 @@ muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd *cmd) void *data = NULL; size_t size = 0; int ret; + uint32_t flags; /* TODO make this a function that returns the size */ - if (cmd->ioctl.vfio_cmd == VFIO_DEVICE_SET_IRQS) { - uint32_t flags = cmd->ioctl.data.irq_set.flags; + switch (cmd->ioctl.vfio_cmd) { + case VFIO_DEVICE_SET_IRQS: + flags = cmd->ioctl.data.irq_set.flags; switch ((flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) { case VFIO_IRQ_SET_DATA_EVENTFD: size = sizeof(int32_t) * cmd->ioctl.data.irq_set.count; @@ -910,24 +1233,28 @@ muser_ioctl(lm_ctx_t *lm_ctx, struct muser_cmd *cmd) size = sizeof(uint8_t) * cmd->ioctl.data.irq_set.count; break; } + break; + case VFIO_IOMMU_MAP_DMA: + size = sizeof(int); + break; } if (size != 0) { - data = calloc(1, size); + data = calloc(1, size); /* TODO use alloca */ if (data == NULL) { #ifdef DEBUG perror("calloc"); #endif return -1; } - - ret = read(lm_ctx->fd, data, size); + ret = transports_ops[lm_ctx->trans].recv_fds(lm_ctx->fd, data, size); if (ret < 0) { -#ifdef DEBUG - perror("read failed"); -#endif goto out; } + if (ret != (int)size) { + lm_log(lm_ctx, LM_ERR, "short read for fds\n"); + return -EINVAL; + } } ret = (int)do_muser_ioctl(lm_ctx, &cmd->ioctl, data); @@ -945,10 +1272,15 @@ drive_loop(lm_ctx_t *lm_ctx) int err; do { - err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_WAIT, &cmd); + err = transports_ops[lm_ctx->trans].get_request(lm_ctx->fd, &cmd); if (err < 0) { + lm_log(lm_ctx, LM_ERR, "failed to receive request: %m\n"); return err; } + if (err == 0) { + lm_log(lm_ctx, LM_INF, "end of file: %m\n"); + break; + } switch (cmd.type) { case MUSER_IOCTL: @@ -969,10 +1301,14 @@ drive_loop(lm_ctx_t *lm_ctx) break; default: lm_log(lm_ctx, LM_ERR, "bad command %d\n", cmd.type); - continue; + /* + * TODO should respond with something here instead of ignoring the + * command. + */ + err = -EINVAL; } cmd.err = err; - err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_DONE, &cmd); + err = transports_ops[lm_ctx->trans].send_response(lm_ctx->fd, &cmd); if (err < 0) { lm_log(lm_ctx, LM_ERR, "failed to complete command: %s\n", strerror(errno)); @@ -994,31 +1330,7 @@ lm_ctx_drive(lm_ctx_t *lm_ctx) return drive_loop(lm_ctx); } -static int -dev_detach(int dev_fd) -{ - return close(dev_fd); -} - -static int -dev_attach(const char *uuid) -{ - char *path; - int dev_fd; - int err; - - err = asprintf(&path, "/dev/" MUSER_DEVNODE "/%s", uuid); - if (err != (int)(strlen(MUSER_DEVNODE) + strlen(uuid) + 6)) { - return -1; - } - - dev_fd = open(path, O_RDWR); - - free(path); - - return dev_fd; -} - +/* FIXME this is not enough anymore, check muser_mmap */ void * lm_mmap(lm_ctx_t *lm_ctx, off_t offset, size_t length) { @@ -1076,11 +1388,40 @@ lm_ctx_destroy(lm_ctx_t *lm_ctx) return; } + /* + * FIXME The following cleanup can be dangerous depending on how lm_ctx_destroy + * is called since it might delete files it did not create. Improve by + * acquiring a lock on the directory. + */ + if (lm_ctx->trans == LM_TRANS_SOCK) { + int ret; + + if (lm_ctx->iommu_dir_fd != -1) { + if ((ret = unlinkat(lm_ctx->iommu_dir_fd, IOMMU_GRP_NAME, 0)) == -1 && errno != ENOENT) { + lm_log(lm_ctx, LM_DBG, "failed to remove " IOMMU_GRP_NAME ": %m\n"); + } + if ((ret = unlinkat(lm_ctx->iommu_dir_fd, MUSER_SOCK, 0)) == -1 && errno != ENOENT) { + lm_log(lm_ctx, LM_DBG, "failed to remove " MUSER_SOCK ": %m\n"); + } + if (close(lm_ctx->iommu_dir_fd) == -1) { + lm_log(lm_ctx, LM_DBG, "failed to close IOMMU dir fd %d: %m\n", + lm_ctx->iommu_dir_fd); + } + } + if (lm_ctx->iommu_dir != NULL) { + if ((ret = rmdir(lm_ctx->iommu_dir)) == -1 && errno != ENOENT) { + lm_log(lm_ctx, LM_DBG, "failed to remove %s: %m\n", lm_ctx->iommu_dir); + } + free(lm_ctx->iommu_dir); + } + } + free(lm_ctx->pci_config_space); - dev_detach(lm_ctx->fd); + transports_ops[lm_ctx->trans].detach(lm_ctx->fd); if (lm_ctx->dma != NULL) { dma_controller_destroy(lm_ctx, lm_ctx->dma); } + free_sparse_mmap_areas(lm_ctx->pci_info.reg_info); free(lm_ctx); // FIXME: Maybe close any open irq efds? Unmap stuff? } @@ -1203,6 +1544,11 @@ lm_ctx_create(const lm_dev_info_t *dev_info) return NULL; } + if (dev_info->trans < 0 || dev_info->trans >= LM_TRANS_MAX) { + errno = EINVAL; + return NULL; + } + /* * FIXME need to check that the number of MSI and MSI-X IRQs are valid * (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X). @@ -1221,6 +1567,9 @@ lm_ctx_create(const lm_dev_info_t *dev_info) if (lm_ctx == NULL) { return NULL; } + lm_ctx->trans = dev_info->trans; + + lm_ctx->iommu_dir_fd = -1; // Set context irq information. for (i = 0; i < max_ivs; i++) { @@ -1254,9 +1603,12 @@ lm_ctx_create(const lm_dev_info_t *dev_info) } // Attach to the muser control device. - lm_ctx->fd = dev_attach(dev_info->uuid); + lm_ctx->fd = transports_ops[dev_info->trans].attach(lm_ctx, dev_info->uuid); if (lm_ctx->fd == -1) { err = errno; + if (errno != EINTR) { + lm_log(lm_ctx, LM_ERR, "failed to attach: %m\n"); + } goto out; } @@ -1271,7 +1623,7 @@ out: if (err) { if (lm_ctx) { dma_controller_destroy(lm_ctx, lm_ctx->dma); - dev_detach(lm_ctx->fd); + transports_ops[dev_info->trans].detach(lm_ctx->fd); free_sparse_mmap_areas(lm_ctx->pci_info.reg_info); free(lm_ctx->pci_config_space); free(lm_ctx); diff --git a/lib/muser.h b/lib/muser.h index f3330fe..3f3a9fa 100644 --- a/lib/muser.h +++ b/lib/muser.h @@ -50,6 +50,12 @@ #define LM_TERSE_LOGGING 0 #endif +#define VFIO_NAME "vfio" +#define VFIO_DIR "/dev/" VFIO_NAME "/" +#define VFIO_CONTAINER VFIO_DIR "/" VFIO_NAME + +#define MUSER_SOCK "cntrl" + typedef uint64_t dma_addr_t; typedef struct { @@ -149,7 +155,9 @@ enum { LM_DEV_INTX_IRQ_IDX, LM_DEV_MSI_IRQ_IDX, LM_DEV_MSIX_IRQ_IDX, - LM_DEV_NUM_IRQS = 3 + LM_DEV_ERR_IRQ_INDEX, + LM_DEV_REQ_IRQ_INDEX, + LM_DEV_NUM_IRQS }; enum { @@ -247,6 +255,12 @@ typedef struct { lm_cap_access_t *fn; } lm_cap_t; +typedef enum { + LM_TRANS_KERNEL, + LM_TRANS_SOCK, + LM_TRANS_MAX +} lm_trans_t; + #define LM_MAX_CAPS (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF /** @@ -297,6 +311,8 @@ typedef struct { * Number of capabilities in above array. */ int nr_caps; + + lm_trans_t trans; } lm_dev_info_t; /** @@ -436,6 +452,10 @@ lm_get_region(loff_t pos, size_t count, loff_t *off); uint8_t * lm_get_pci_non_std_config_space(lm_ctx_t *lm_ctx); +/* FIXME */ +int muser_send_fds(int sock, int *fds, size_t count); +ssize_t muser_recv_fds(int sock, int *fds, size_t count); + #endif /* LIB_MUSER_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/libpathtrap b/libpathtrap new file mode 160000 +Subproject 7a3a8242c9b31c39b26f4e2cf7f07659d7e272f diff --git a/libvfio/CMakeLists.txt b/libvfio/CMakeLists.txt new file mode 100644 index 0000000..b6af4c9 --- /dev/null +++ b/libvfio/CMakeLists.txt @@ -0,0 +1,35 @@ +# +# Copyright (c) 2019 Nutanix Inc. All rights reserved. +# +# Authors: Thanos Makatos <thanos@nutanix.com> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Nutanix nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +add_library(vfio SHARED + ../libpathtrap/libpathtrap.h + ../libpathtrap/vma_addr.h + libvfio.c +) +target_link_libraries(vfio muser dl ${CMAKE_BINARY_DIR}/libpathtrap/libpathtrap.a) +set(CMAKE_C_FLAGS "-Wall -Wextra -Werror -ldl") diff --git a/libvfio/libvfio.c b/libvfio/libvfio.c new file mode 100644 index 0000000..2487546 --- /dev/null +++ b/libvfio/libvfio.c @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2020 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#define _GNU_SOURCE +#include <stddef.h> +#include <assert.h> +#include <errno.h> +#include <linux/vfio.h> +#include <string.h> +#include <stdio.h> +#include <linux/muser.h> +#include <muser/muser.h> +#include <muser/pci.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <sys/un.h> +#include <signal.h> +#include <sys/mman.h> +#include <dlfcn.h> + +#include "../libpathtrap/libpathtrap.h" +#include "../libpathtrap/vma_addr.h" + +#ifdef DEBUG +#define debug(fmt, ...) \ + do { \ + fprintf(stderr, "%s:%d " fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define debug(fmt, ...) +#endif + +#define VFIO_NAME "vfio" +#define VFIO_DIR "/dev/" VFIO_NAME "/" +#define VFIO_CONTAINER VFIO_DIR "/" VFIO_NAME + +static int sock = -1; + +enum vfio_fd_type { + VFIO_FD_TYPE_CONTAINER, + VFIO_FD_TYPE_GROUP, + VFIO_FD_TYPE_DEVICE +}; + +struct vfio_fd { + enum vfio_fd_type type; + unsigned long iommu_grp; /* VFIO_FD_TYPE_GROUP only */ +}; + +int __open(struct fake_fd *fake_fd, const char *pathname, + int flags __attribute__((unused)), void *priv) { + + int fd = -1; + int err = 0; + struct vfio_fd *vfio_fd = calloc(1, sizeof *vfio_fd); + + if (!vfio_fd) + return -1; + + if ((fd = syscall(SYS_memfd_create, pathname, 0)) == -1) { + err = errno; + goto out; + } + + if (!strncmp(pathname, VFIO_DIR, sizeof VFIO_DIR - 1)) { + if (!strcmp(pathname + sizeof VFIO_DIR -1, VFIO_NAME)) { + vfio_fd->type = VFIO_FD_TYPE_CONTAINER; + debug("container fd=%d\n", fd); + } else { + char *endptr; + vfio_fd->iommu_grp = strtoul(pathname + sizeof VFIO_DIR - 1, &endptr, 10); + if (*endptr != '\0' || (vfio_fd->iommu_grp == ULONG_MAX && errno == ERANGE)) { + err = EINVAL; + goto out; + } + vfio_fd->type = VFIO_FD_TYPE_GROUP; + debug("group fd=%d\n", fd); + } + } else { + if (!priv && *(bool*)priv != true) { + debug("bad path %s\n", pathname); + err = EINVAL; + goto out; + } + debug("device fd=%d\n", fd); + vfio_fd->type = VFIO_FD_TYPE_DEVICE; + } + fake_fd_set_priv(fake_fd, (void*)vfio_fd); +out: + if (err) { + if (fd != -1) + close(fd); + free(vfio_fd); + errno = err; + return -1; + } + return fd; +} + +int __close(struct fake_fd *fake_fd) { + return __real_close(fake_fd->fd); +} + +bool __should_trap(const char *pathname) { + /* + * FIXME should only trap /dev/vfio/vfio and /dev/vfio/[0-9]+ in order + * in order to allow real VFIO devices to work, however since we already + * trap /dev/vfio/vfio those devices cannot work in the first place. + */ + return 0 == strncmp(pathname, VFIO_DIR, sizeof VFIO_DIR -1); +} + +ssize_t __read(struct fake_fd *fake_fd __attribute__((unused)), + void *buf, size_t count, off_t *offset) { + + struct muser_cmd muser_cmd = { + .type = MUSER_READ, + .rw = { + .count = count, + .pos = *offset + } + }; + size_t ret; + +#if 0 + debug("R fd=%d %s %#lx-%#lx\n", + fake_fd->fd, fake_fd->pathname, *offset, *offset + count); +#endif + + /* + * FIXME need to set fd (which is the device fd) in muser_cmd + * so that it knows for which device it is. + */ + if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + debug("failed to send command: %m\n"); + return ret; + } + if ((ret = __real_read(sock, buf, count)) != count) { + debug("failed to read data: %m\n"); + return ret; + } + if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + debug("failed to read response: %m\n"); + return ret; + } + if (muser_cmd.err != (int)count) { + debug("bad response: %d\n", muser_cmd.err); + return muser_cmd.err; + } + return count; +} + +ssize_t __write(struct fake_fd *fake_fd __attribute__((unused)), + const void *buf, size_t count, off_t *offset) { + + struct muser_cmd muser_cmd = { + .type = MUSER_WRITE, + .rw = { + .count = count, + .pos = *offset + } + }; + int ret; + +#if 0 + debug("W fd=%d %s %#lx-%#lx\n", + fake_fd->fd, fake_fd->pathname, *offset, *offset + count); +#endif + + /* + * FIXME need to set fd (which is the device fd) in muser_cmd + * so that it knows for which device it is. + */ + if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + debug("failed to send command (%d): %m\n", ret); + return ret; + } + if ((ret = __real_write(sock, buf, count)) != (int)count) { + debug("failed to send data (%d): %m\n", ret); + return ret; + } + if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + debug("failed to receive response (%d): %m\n", ret); + return ret; + } + if (muser_cmd.err) { + debug("command failed: %s\n", strerror(-muser_cmd.err)); + errno = -muser_cmd.err; + return -1; + } + return count; +} + +static int open_sock(struct vfio_fd *vfio_fd) { + + int ret; + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + + assert(vfio_fd); + + ret = snprintf(addr.sun_path, sizeof addr.sun_path, + VFIO_DIR "%lu/" MUSER_SOCK, vfio_fd->iommu_grp); + + if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + debug("failed to open socket: %m\n"); + return sock; + } + + if ((ret = connect(sock, (struct sockaddr*)&addr, sizeof(addr))) == -1) { + debug("failed to connect to %s: %m\n", addr.sun_path); + return ret; + } + + return sock; +} + +static int vfio_group_get_device_fd(struct fake_fd *fake_fd __attribute__((unused)), + unsigned long args __attribute__((unused))) { + /* FIXME need to generate name based on passed fake_fd + * FIXME need to associate the fd we return with the passed fake_fd + */ + bool flag = true; + return open_fake("device_fd", 0, &flag); +} + +static int vfio_set_data_eventfd(struct muser_cmd *muser_cmd, int *fds, size_t size) { + if (muser_cmd->ioctl.data.irq_set.count * sizeof(int) != size) { + errno = EINVAL; + return -1; + } + return muser_send_fds(sock, fds, muser_cmd->ioctl.data.irq_set.count); +} + +static int map_dma(vma_info_t *vma_info) { + return muser_send_fds(sock, &vma_info->fd, 1); +} + + +int __ioctl(struct fake_fd *fake_fd, unsigned int cmd, unsigned long args) { + + int ret = 0; + struct muser_cmd muser_cmd = { 0 }; + ssize_t minsz, argsz; + vma_info_t vma_info; + + muser_cmd.type = MUSER_IOCTL; + muser_cmd.ioctl.vfio_cmd = cmd; + + if (sock == -1) + if ((sock = open_sock((struct vfio_fd*)fake_fd->priv)) == -1) + return sock; + + if ((minsz = get_minsz(cmd)) < 0) { + debug("bad minsz=%lu\n", minsz); + return -EINVAL; + } + + /* + * Initialize muser_cmd. + */ + switch (cmd) { + case VFIO_CHECK_EXTENSION: + muser_cmd.ioctl.data.vfio_extension = (int)args; + break; + case VFIO_SET_IOMMU: + muser_cmd.ioctl.data.iommu_type = (int)args; + break; + case VFIO_GROUP_UNSET_CONTAINER: + muser_cmd.ioctl.data.container_fd = (int)args; + break; + case VFIO_GROUP_GET_DEVICE_FD: + muser_cmd.ioctl.data.device_fd = (int)args; + return vfio_group_get_device_fd(fake_fd, + muser_cmd.ioctl.data.device_fd); + default: + memcpy(&muser_cmd.ioctl.data, (void*)args, minsz); + } + + switch (cmd) { + case VFIO_DEVICE_SET_IRQS: /* TODO this can go into switch above */ + switch (muser_cmd.ioctl.data.irq_set.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_EVENTFD: + break; + case VFIO_IRQ_SET_DATA_NONE: + case VFIO_IRQ_SET_DATA_BOOL: + debug("ignore IRQ set %#x\n", + muser_cmd.ioctl.data.irq_set.flags); + return 0; + default: + return -EINVAL; + } + break; + case VFIO_IOMMU_MAP_DMA: + ret = vma_addr((void*)muser_cmd.ioctl.data.dma_map.vaddr, &vma_info); + if (ret != 0 || vma_info.fd == -1 || vma_info.map_pvt) { + debug("ignore vma for vaddr=%#llx, iova=%#llx-%#llx: %m\n", + muser_cmd.ioctl.data.dma_map.vaddr, + muser_cmd.ioctl.data.dma_map.iova, + muser_cmd.ioctl.data.dma_map.iova + muser_cmd.ioctl.data.dma_map.size); + return 0; + } + /* + * FIXME abuse field to communicate file offset so that + * we don't need to introduce a new struct. + */ + muser_cmd.ioctl.data.dma_map.vaddr -= (__u64)vma_info.vma_start; + break; + } + + if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) == -1) { + debug("failed to send command: %m\n"); + return ret; + } + + switch (cmd) { + case VFIO_DEVICE_SET_IRQS: + if ((argsz = get_argsz(cmd, &muser_cmd)) < 0) + return argsz; + + /* + * FIXME we can also send the muser_cmd as part of the payload, + * instead of sending it separately at an earlier point. This + * would be ideal also for DMA_MAP as we only have to send one + * fd. For IRQs there doesn't seem to be a limit, so I'm not + * sure how it will be handled at the other end, e.g. will there + * have to be a maximum message size so we might have to spit + * it? + */ + ret = vfio_set_data_eventfd(&muser_cmd, (int*)(args + minsz), + argsz - minsz); + if (ret == -1) + return ret; + break; + case VFIO_IOMMU_MAP_DMA: + if ((ret = map_dma(&vma_info)) == -1) { + debug("failed to map DMA: %m\n"); + return ret; + } + break; + case VFIO_DEVICE_GET_REGION_INFO: + if ((argsz = get_argsz(cmd, &muser_cmd)) < 0) + return argsz; + if (argsz > minsz) { + /* + * FIXME we must now read the response from libmuser, which will + * be at least sizeof(struct muser_cmd) bytes. Whatever we read + * before those last bytes is sparse info (can be zero). Non-sparse + * information must be stored in the passed struct vfio_region_info + * (flags/cap_offset/size/offset), which is the args argument of + * this function. + * + * The first time the user calls VFIO_DEVICE_GET_REGION_INFO, + * the struct vfio_region_info is not large enough to accomodate + * any sparse info. If there is sparse info, it is indicated so + * by libmuser by setting the argz and cap_offset fields accordingly + * in order to indicate the required space. Then the user calls + * again VFIO_DEVICE_GET_REGION_INFO with argz sufficiently large + * to hold the sparse information (check function + * dev_get_sparse_mmap_cap). + * + * Therefore if argz is larger than sizeof struct vfio_region_info, + * we need to read argz - sizeof struct vfio_region_info bytes + * which will be the sparse information, plus the response (sizeof struct muser_cmd). + * + * libmuser replies first with sparse info and then with + * struct vfio_region_info, however the args argument + * contains them in the reverse order. + */ + if ((ret = __real_read(sock, (void*)(args + minsz), argsz - minsz)) != argsz - minsz) { + debug("short read: %d/%ld\n", ret, argsz - minsz); + return -1; + } + } + break; + } + if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + return ret; + } + if (cmd == VFIO_IOMMU_UNMAP_DMA && muser_cmd.err == -ENOENT) { + muser_cmd.err = 0; + } + if (muser_cmd.err) { + debug("VFIO command %s failed with %d\n", + vfio_cmd_to_str(cmd), muser_cmd.err); + return muser_cmd.err; + } + + switch (cmd) { + case VFIO_GET_API_VERSION: + return muser_cmd.ioctl.data.vfio_api_version; + case VFIO_CHECK_EXTENSION: + return muser_cmd.ioctl.data.vfio_extension; + case VFIO_SET_IOMMU: + case VFIO_DEVICE_RESET: + return 0; + } + + memcpy((void*)args, &muser_cmd.ioctl.data, minsz); + + return 0; +} + +int ____xstat(int ver __attribute__((unused)), const char *filename __attribute__((unused)), + struct stat *stat_buf) { + + memset(stat_buf, 0, sizeof *stat_buf); + stat_buf->st_mode = S_IFREG | S_IRWXU | S_IRWXG | S_IXGRP | S_IROTH | S_IXOTH; + return 0; +} + +int ____xstat64(int ver __attribute__((unused)), + const char *filename __attribute__((unused)), + struct stat64 *stat_buf) { + + memset(stat_buf, 0, sizeof *stat_buf); + stat_buf->st_mode = S_IFREG | S_IRWXU | S_IRWXG | S_IXGRP | S_IROTH | S_IXOTH; + return 0; +} + +char *__realpath(const char *path, char *resolved_path) { + if (!resolved_path) + resolved_path = strdup(path); + else + strcpy(resolved_path, path); + return resolved_path; +} + +static void *__mmap64(void *addr __attribute__((unused)), size_t length, int prot, int flags, + struct fake_fd *fake_fd, off_t offset) { + + int ret; + struct muser_cmd muser_cmd = { + .type = MUSER_MMAP, + .mmap.request = { + .len = length, + .addr = offset + } + }; + int fd; + + assert(fake_fd->priv); + + if (((struct vfio_fd*)fake_fd->priv)->type != VFIO_FD_TYPE_DEVICE) { + errno = EINVAL; + return MAP_FAILED; + } + + if ((ret = __real_write(sock, &muser_cmd, sizeof muser_cmd)) == -1) { + debug("failed to send command: %m\n"); + return MAP_FAILED; + } + ret = muser_recv_fds(sock, &fd, 1); + if (ret != 1) { + debug("failed to receive device memory fd (%d): %m\n", ret); + return MAP_FAILED; + } + if ((ret = __real_read(sock, &muser_cmd, sizeof muser_cmd)) != sizeof muser_cmd) { + debug("failed to receive response (%d): %m\n", ret); + return MAP_FAILED; + } + if (muser_cmd.err) { + debug("command failed: %s\n", strerror(-muser_cmd.err)); + errno = -muser_cmd.err; + return MAP_FAILED; + } + return __real_mmap64(NULL, length, prot, flags, fd, offset); +} + +struct ops ops = { + .should_trap = &__should_trap, + .open = &__open, + .close = &__close, + .read = &__read, + .write = &__write, + .ioctl = &__ioctl, + .__xstat = &____xstat, + .__xstat64 = &____xstat64, + .__lxstat64 = &____xstat64, + .realpath = &__realpath, + .mmap64 = &__mmap64 +}; + +__attribute__((constructor)) static void ctor() +{ +} diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c index 285b600..bf35e49 100644 --- a/samples/gpio-pci-idio-16.c +++ b/samples/gpio-pci-idio-16.c @@ -36,9 +36,16 @@ #include <stdio.h> #include <err.h> #include <stdlib.h> +#include <unistd.h> #include "../lib/muser.h" +static void +_log(void *pvt, char const *msg) +{ + fprintf(stderr, "gpio: %s", msg); +} + ssize_t bar2_access(void *pvt, char * const buf, size_t count, loff_t offset, const bool is_write) @@ -51,15 +58,34 @@ bar2_access(void *pvt, char * const buf, size_t count, loff_t offset, return count; } -int main(int argc, char **argv) +int main(int argc, char *argv[]) { int ret; + bool trans_sock = false, verbose = false; + char opt; + + while ((opt = getopt(argc, argv, "sv")) != -1) { + switch (opt) { + case 's': + trans_sock = true; + break; + case 'v': + verbose = true; + break; + default: /* '?' */ + fprintf(stderr, "Usage: %s [-s] [-d] UUID\n", argv[0]); + exit(EXIT_FAILURE); + } + } - if (argc != 2) { + if (optind >= argc) { err(EXIT_FAILURE, "missing MUSER device UUID"); } lm_dev_info_t dev_info = { + .trans = trans_sock ? LM_TRANS_SOCK : LM_TRANS_KERNEL, + .log = verbose ? _log : NULL, + .log_lvl = LM_DBG, .pci_info = { .id = {.vid = 0x494F, .did = 0x0DC8 }, .reg_info[LM_DEV_BAR2_REG_IDX] = { @@ -69,7 +95,7 @@ int main(int argc, char **argv) }, .irq_count[LM_DEV_INTX_IRQ_IDX] = 1, }, - .uuid = argv[1], + .uuid = argv[optind], }; ret = lm_ctx_run(&dev_info); |