diff options
-rw-r--r-- | include/libvfio-user.h | 59 | ||||
-rw-r--r-- | lib/libvfio-user.c | 83 | ||||
-rw-r--r-- | lib/private.h | 11 | ||||
-rw-r--r-- | lib/tran_sock.c | 129 | ||||
-rw-r--r-- | lib/tran_sock.h | 39 | ||||
-rw-r--r-- | samples/client.c | 55 | ||||
-rw-r--r-- | samples/gpio-pci-idio-16.c | 2 | ||||
-rw-r--r-- | samples/server.c | 19 | ||||
-rw-r--r-- | test/mocks.c | 11 | ||||
-rw-r--r-- | test/unit-tests.c | 77 |
10 files changed, 340 insertions, 145 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 342f3e5..d5a8c6a 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -188,28 +188,6 @@ void * vfu_mmap(vfu_ctx_t * vfu_ctx, off_t offset, size_t length); /** - * Prototype for memory access callback. The program MUST first map device - * memory in its own virtual address space using vfu_mmap, do any additional work - * required, and finally return that memory. When a region is memory mapped, - * libvfio-user calls the previously registered callback with the following - * arguments: - * - * @pvt: private pointer - * @off: offset of memory area being memory mapped - * @len: length of memory area being memory mapped - * - * @returns the memory address returned by vfu_mmap, or MAP_FAILED on failure - */ -typedef unsigned long (vfu_map_region_cb_t) (void *pvt, unsigned long off, - unsigned long len); - -#define VFU_REGION_FLAG_READ (1 << 0) -#define VFU_REGION_FLAG_WRITE (1 << 1) -#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar? -#define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE) -#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO - -/** * Prototype for region access callback. When a region is accessed, libvfio-user * calls the previously registered callback with the following arguments: * @@ -224,6 +202,12 @@ typedef unsigned long (vfu_map_region_cb_t) (void *pvt, unsigned long off, typedef ssize_t (vfu_region_access_cb_t) (void *pvt, char *buf, size_t count, loff_t offset, bool is_write); +#define VFU_REGION_FLAG_READ (1 << 0) +#define VFU_REGION_FLAG_WRITE (1 << 1) +#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar? +#define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE) +#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO + /** * Set up a region. * @@ -236,11 +220,30 @@ typedef ssize_t (vfu_region_access_cb_t) (void *pvt, char *buf, size_t count, * @vfu_ctx: the libvfio-user context * @region_idx: region index * @size: size of the region - * @region_access: callback function to access region - * @flags: region flags - * @mmap_areas: array of memory mappable areas - * @nr_mmap_areas: size of mmap_areas - * @map: callback function to map region + * @region_access: callback function to access region. If the region is memory + * mappable and the client accesses the region or part of sparse area, then + * the callback is not called. + * @flags: region flags (VFU_REGION_FLAG_) + * @mmap_areas: array of memory mappable areas. This array provides to the + * server greater control of which specific areas should be memory mapped by + * the client. Each element in the @mmap_areas array describes one such area. + * Ignored if @nr_mmap_areas is 0 or if the region is not memory mappable. + * @nr_mmap_areas: number of sparse areas in @mmap_areas. Must be 0 if the + * region is not memory mappable. + * @fd: file descriptor of the file backing the region if it's a mappable + * region. It is the server's responsibility to create a file suitable for + * memory mapping by the client. Ignored if the region is not memory mappable. + * + * A note on memory-mappable regions: the client can memory map any part of the + * file descriptor, even if not supposed to do so acocrding to @mmap_areas. + * There is no way in Linux to avoid this. + * + * TODO maybe we should introduce per-sparse region file descriptors so that + * the client cannot possibly memory map areas it's not supposed to. Even if + * the client needs to have region under the same backing file, it is possible + * to create linear device-mapper targets, one for each area, and provide file + * descriptors of these DM targets. This is something we can document and + * demonstrate in a sample. * * @returns 0 on success, -1 on error, Sets errno. */ @@ -248,7 +251,7 @@ int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_region_access_cb_t *region_access, int flags, struct iovec *mmap_areas, uint32_t nr_mmap_areas, - vfu_map_region_cb_t *map); + int fd); /* * Callback function that is called when the guest resets the device. diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index fab68d5..846a70f 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -107,9 +107,9 @@ get_vfio_caps_size(bool is_migr_reg, struct vfu_sparse_mmap_areas *m) * Sparse mmap information stays after struct vfio_region_info and cap_offest * points accordingly. */ -static void +static int dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, - struct vfio_region_info *vfio_reg) + struct vfio_region_info *vfio_reg, int **fds, size_t *nr_fds) { struct vfio_info_cap_header *header; struct vfio_region_info_cap_type *type = NULL; @@ -118,6 +118,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, assert(vfu_ctx != NULL); assert(vfio_reg != NULL); + assert(fds != NULL); + assert(nr_fds != NULL); header = (struct vfio_info_cap_header*)(vfio_reg + 1); @@ -140,13 +142,25 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, vfio_reg->cap_offset = sizeof(struct vfio_region_info); sparse = (struct vfio_region_info_cap_sparse_mmap*)header; } + + /* + * FIXME need to figure out how to break message into smaller messages + * so that we don't exceed client_max_fds + */ + assert(nr_mmap_areas <= vfu_ctx->client_max_fds); + + *fds = malloc(nr_mmap_areas * sizeof(int)); + if (*fds == NULL) { + return -ENOMEM; + } sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; sparse->header.version = 1; sparse->header.next = 0; - sparse->nr_areas = nr_mmap_areas; + sparse->nr_areas = *nr_fds = nr_mmap_areas; mmap_areas = vfu_reg->mmap_areas; for (i = 0; i < nr_mmap_areas; i++) { + (*fds)[i] = vfu_reg->fd; sparse->areas[i].offset = (__u64)mmap_areas->areas[i].iov_base; sparse->areas[i].size = mmap_areas->areas[i].iov_len; vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d %#llx-%#llx", __func__, @@ -154,6 +168,7 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, sparse->areas[i].offset + sparse->areas[i].size); } } + return 0; } #define VFU_REGION_SHIFT 40 @@ -207,7 +222,7 @@ is_migr_reg(vfu_ctx_t *vfu_ctx, int index) long dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, - struct vfio_region_info **vfio_reg) + struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds) { vfu_reg_info_t *vfu_reg; size_t caps_size; @@ -243,12 +258,14 @@ dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index); (*vfio_reg)->size = vfu_reg->size; + *nr_fds = 0; if (caps_size > 0) { if (vfu_reg->mmap_areas != NULL) { (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS; } if (argsz >= (*vfio_reg)->argsz) { - dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), *vfio_reg); + dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), + *vfio_reg, fds, nr_fds); } } @@ -482,14 +499,15 @@ vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count, static int handle_device_get_region_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_region_info *reg_info_in, - struct vfio_region_info **reg_info_out) + struct vfio_region_info **reg_info_out, + int **fds, size_t *nr_fds) { if (size < sizeof(*reg_info_in)) { return -EINVAL; } return dev_get_reginfo(vfu_ctx, reg_info_in->index, reg_info_in->argsz, - reg_info_out); + reg_info_out, fds, nr_fds); } int @@ -879,7 +897,7 @@ UNIT_TEST_SYMBOL(get_next_command); int exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size, - int *fds, size_t nr_fds, + int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out, struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data) { @@ -958,7 +976,8 @@ exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size, break; case VFIO_USER_DEVICE_GET_REGION_INFO: ret = handle_device_get_region_info(vfu_ctx, hdr->msg_size, cmd_data, - &dev_reg_info); + &dev_reg_info, fds_out, + nr_fds_out); if (ret == 0) { _iovecs[1].iov_base = dev_reg_info; _iovecs[1].iov_len = hdr->msg_size; @@ -1024,8 +1043,9 @@ process_request(vfu_ctx_t *vfu_ctx) { struct vfio_user_header hdr = { 0, }; int ret; - int *fds = NULL; + int *fds = NULL, *fds_out = NULL; size_t nr_fds, i; + size_t nr_fds_out = 0; struct iovec _iovecs[2] = { { 0, } }; struct iovec *iovecs = NULL; size_t nr_iovecs = 0; @@ -1054,8 +1074,8 @@ process_request(vfu_ctx_t *vfu_ctx) return ret; } - ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, _iovecs, &iovecs, - &nr_iovecs, &free_iovec_data); + ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, &fds_out, &nr_fds_out, + _iovecs, &iovecs, &nr_iovecs, &free_iovec_data); for (i = 0; i < nr_fds; i++) { if (fds[i] != -1) { @@ -1081,7 +1101,7 @@ process_request(vfu_ctx_t *vfu_ctx) if (!(hdr.flags.no_reply)) { // FIXME: SPEC: should the reply include the command? I'd say yes? ret = vfu_send_iovec(vfu_ctx->conn_fd, hdr.msg_id, true, - 0, iovecs, nr_iovecs, NULL, 0, -ret); + 0, iovecs, nr_iovecs, fds_out, nr_fds_out, -ret); if (unlikely(ret < 0)) { vfu_log(vfu_ctx, LOG_ERR, "failed to complete command: %s", strerror(-ret)); @@ -1465,11 +1485,37 @@ copy_sparse_mmap_areas(vfu_reg_info_t *reg_info, return 0; } +static int +setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas, + uint32_t nr_mmap_areas, int fd) +{ + int ret, i; + + assert(r != NULL); + + if (fd == -1) { + return -EBADF; + } + r->fd = fd; + ret = copy_sparse_mmap_areas(r, mmap_areas, nr_mmap_areas); + if (ret < 0) { + return ret; + } + for (i = 0; i < r->mmap_areas->nr_mmap_areas; i++) { + struct iovec *a = &r->mmap_areas->areas[i]; + if ((unsigned long long)a->iov_base + a->iov_len > r->size) { + free(r->mmap_areas); + return -EINVAL; + } + } + return 0; +} + int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_region_access_cb_t *region_access, int flags, struct iovec *mmap_areas, uint32_t nr_mmap_areas, - vfu_map_region_cb_t *map) + int fd) { int ret; @@ -1487,12 +1533,9 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_ctx->reg_info[region_idx].size = size; vfu_ctx->reg_info[region_idx].fn = region_access; - if (map != NULL) { - vfu_ctx->reg_info[region_idx].map = map; - } - if (mmap_areas) { - ret = copy_sparse_mmap_areas(&vfu_ctx->reg_info[region_idx], - mmap_areas, nr_mmap_areas); + if (nr_mmap_areas > 0) { + ret = setup_sparse_areas(&vfu_ctx->reg_info[region_idx], mmap_areas, + nr_mmap_areas, fd); if (ret < 0) { return ERROR(-ret); } diff --git a/lib/private.h b/lib/private.h index 7860ec2..968271d 100644 --- a/lib/private.h +++ b/lib/private.h @@ -91,12 +91,8 @@ typedef struct { */ vfu_region_access_cb_t *fn; - /* - * Callback function that is called when the region is memory mapped. - * Required if VFU_REGION_FLAG_MEM is set, otherwise ignored. - */ - vfu_map_region_cb_t *map; struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */ + int fd; } vfu_reg_info_t; struct pci_dev { @@ -158,7 +154,7 @@ get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds, int exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size, - int *fds, size_t nr_fds, + int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out, struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data); @@ -171,9 +167,10 @@ consume_fd(int *fds, size_t nr_fds, size_t index); int handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_device_info *dev_info); + long dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, - struct vfio_region_info **vfio_reg); + struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds); #endif /* LIB_VFIO_USER_PRIVATE_H */ diff --git a/lib/tran_sock.c b/lib/tran_sock.c index f96784d..dd682db 100644 --- a/lib/tran_sock.c +++ b/lib/tran_sock.c @@ -214,6 +214,52 @@ vfu_send_error(int sock, uint16_t msg_id, return vfu_send_iovec(sock, msg_id, true, cmd, NULL, 0, NULL, 0, error); } +static int +get_msg(void *data, size_t len, int *fds, size_t *nr_fds, int sock_fd, + int sock_flags) +{ + int ret; + struct iovec iov = {.iov_base = data, .iov_len = len}; + struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1}; + struct cmsghdr *cmsg; + + if (nr_fds != NULL && *nr_fds > 0) { + assert(fds != NULL); + msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds); + msg.msg_control = alloca(msg.msg_controllen); + *nr_fds = 0; + } + + ret = recvmsg(sock_fd, &msg, sock_flags); + if (ret == -1) { + return -errno; + } + + if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) { + return -EFAULT; + } + + if (nr_fds != NULL) { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + continue; + } + if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) { + return -EINVAL; + } + int size = cmsg->cmsg_len - CMSG_LEN(0); + if (size % sizeof(int) != 0) { + return -EINVAL; + } + *nr_fds = (int)(size / sizeof(int)); + memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int)); + break; + } + } + + return ret; +} + /* * Receive a vfio-user message. If "len" is set to non-zero, the message should * include data of that length, which is stored in the pre-allocated "data" @@ -224,14 +270,15 @@ vfu_send_error(int sock, uint16_t msg_id, * better. */ int -vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, - uint16_t *msg_id, void *data, size_t *len) +vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void *data, size_t *len, int *fds, + size_t *nr_fds) { int ret; /* FIXME if ret == -1 then fcntl can overwrite recv's errno */ - ret = recv_blocking(sock, hdr, sizeof(*hdr), 0); + ret = get_msg(hdr, sizeof *hdr, fds, nr_fds, sock, 0); if (ret == -1) { return -errno; } @@ -275,6 +322,13 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, return 0; } +int +vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void *data, size_t *len) +{ + return vfu_recv_fds(sock, hdr, is_reply, msg_id, data, len, NULL, NULL); +} + /* * Like vfu_recv(), but will automatically allocate reply data. * @@ -333,26 +387,29 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, int vfu_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd, struct iovec *iovecs, size_t nr_iovecs, - int *send_fds, size_t fd_count, + int *send_fds, size_t send_fd_count, struct vfio_user_header *hdr, - void *recv_data, size_t recv_len) + void *recv_data, size_t recv_len, + int *recv_fds, size_t *recv_fd_count) { int ret = vfu_send_iovec(sock, msg_id, false, cmd, iovecs, nr_iovecs, - send_fds, fd_count, 0); + send_fds, send_fd_count, 0); if (ret < 0) { return ret; } if (hdr == NULL) { hdr = alloca(sizeof *hdr); } - return vfu_recv(sock, hdr, true, &msg_id, recv_data, &recv_len); + return vfu_recv_fds(sock, hdr, true, &msg_id, recv_data, &recv_len, + recv_fds, recv_fd_count); } int -vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, - void *send_data, size_t send_len, - struct vfio_user_header *hdr, - void *recv_data, size_t recv_len) +vfu_msg_fds(int sock, uint16_t msg_id, enum vfio_user_command cmd, + void *send_data, size_t send_len, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len, int *recv_fds, + size_t *recv_fd_count) { /* [0] is for the header. */ struct iovec iovecs[2] = { @@ -362,7 +419,18 @@ vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, } }; return vfu_msg_iovec(sock, msg_id, cmd, iovecs, ARRAY_SIZE(iovecs), - NULL, 0, hdr, recv_data, recv_len); + NULL, 0, hdr, recv_data, recv_len, recv_fds, + recv_fd_count); +} + +int +vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, + void *send_data, size_t send_len, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len) +{ + return vfu_msg_fds(sock, msg_id, cmd, send_data, send_len, hdr, recv_data, + recv_len, NULL, NULL); } /* @@ -650,15 +718,7 @@ static int get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds, size_t *nr_fds) { - int ret, sock_flags = 0; - struct iovec iov = {.iov_base = hdr, .iov_len = sizeof *hdr}; - struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1}; - struct cmsghdr *cmsg; - - msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds); - msg.msg_control = alloca(msg.msg_controllen); - - *nr_fds = 0; + int sock_flags = 0; /* * TODO ideally we should set O_NONBLOCK on the fd so that the syscall is @@ -668,32 +728,7 @@ get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) { sock_flags = MSG_DONTWAIT | MSG_WAITALL; } - ret = recvmsg(vfu_ctx->conn_fd, &msg, sock_flags); - if (ret == -1) { - return -errno; - } - - if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) { - return -EFAULT; - } - - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { - continue; - } - if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) { - return -EINVAL; - } - int size = cmsg->cmsg_len - CMSG_LEN(0); - if (size % sizeof(int) != 0) { - return -EINVAL; - } - *nr_fds = (int)(size / sizeof(int)); - memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int)); - break; - } - - return ret; + return get_msg(hdr, sizeof *hdr, fds, nr_fds, vfu_ctx->conn_fd, sock_flags); } struct transport_ops sock_transport_ops = { diff --git a/lib/tran_sock.h b/lib/tran_sock.h index b764e9b..c4ed30c 100644 --- a/lib/tran_sock.h +++ b/lib/tran_sock.h @@ -92,6 +92,15 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, uint16_t *msg_id, void *data, size_t *len); /* + * Same as vfu_recv except it receives passed file descriptors. See vfu_msg on + * the semantics of @fds and @nr_fds. + */ +int +vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void *data, size_t *len, int *fds, + size_t *nr_fds); + +/* * Receive a message from the other end, but automatically allocate a buffer for * it, which must be freed by the caller. If there is no data, *datap is set to * NULL. @@ -105,16 +114,25 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, * iovecs array should leave the first entry empty, as it will be used for the * header. * - * If specified, the given fds are sent to the other side. @hdr is filled with - * the reply header if non-NULL. + * If specified, the given @send_fds are sent to the other side. @hdr is filled + * with the reply header if non-NULL. + * + * @recv_fds and @recv_fd_count are used to receive file descriptors. + * If @recv_fd_count is NULL then @recv_fds is ignored and no file descriptors + * are received. If @recv_fd_count is non-NULL then it contains the number of + * file descriptors that can be stored in @recv_fds, in which case @recv_fds + * must point to sufficient memory. On return, @recv_fd_count contains the + * number of file decriptors actually received, which does not exceeed the + * original value of @recv_fd_count. */ int vfu_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd, struct iovec *iovecs, size_t nr_iovecs, - int *send_fds, size_t fd_count, + int *send_fds, size_t send_fd_count, struct vfio_user_header *hdr, - void *recv_data, size_t recv_len); + void *recv_data, size_t recv_len, + int *recv_fds, size_t *recv_fd_count); /* * Send and receive a message to the other end. @hdr is filled with the reply @@ -127,6 +145,19 @@ vfu_msg(int sock, uint16_t msg_id, struct vfio_user_header *hdr, void *recv_data, size_t recv_len); +/* + * Same as vfu_msg excecpt that file descriptors can be received, see + * vfu_msg_iovec for the semantics of @recv_fds and @recv_fd_count. + */ +int +vfu_msg_fds(int sock, uint16_t msg_id, + enum vfio_user_command cmd, + void *send_data, size_t send_len, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len, + int *recv_fds, size_t *recv_fd_count); + + #endif /* LIB_VFIO_USER_TRAN_SOCK_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/samples/client.c b/samples/client.c index 3e3ba54..576815d 100644 --- a/samples/client.c +++ b/samples/client.c @@ -207,10 +207,10 @@ send_device_reset(int sock) /* returns whether a VFIO migration capability is found */ static bool -get_region_vfio_caps(struct vfio_info_cap_header *header) +get_region_vfio_caps(struct vfio_info_cap_header *header, + struct vfio_region_info_cap_sparse_mmap *sparse) { struct vfio_region_info_cap_type *type; - struct vfio_region_info_cap_sparse_mmap *sparse; unsigned int i; bool migr = false; @@ -247,13 +247,12 @@ get_region_vfio_caps(struct vfio_info_cap_header *header) } static void -do_get_device_region_info(int sock, struct vfio_region_info *region_info) +do_get_device_region_info(int sock, struct vfio_region_info *region_info, + int *fds, size_t *nr_fds) { - int ret = vfu_msg(sock, 0, - VFIO_USER_DEVICE_GET_REGION_INFO, - region_info, region_info->argsz, - NULL, - region_info, region_info->argsz); + int ret = vfu_msg_fds(sock, 0, VFIO_USER_DEVICE_GET_REGION_INFO, + region_info, region_info->argsz, NULL, + region_info, region_info->argsz, fds, nr_fds); if (ret < 0) { errx(EXIT_FAILURE, "failed to get device region info: %s", strerror(-ret)); @@ -266,19 +265,24 @@ get_device_region_info(int sock, uint32_t index) struct vfio_region_info *region_info; size_t cap_sz; size_t size = sizeof(struct vfio_region_info); + size_t nr_fds = CLIENT_MAX_FDS; + int fds[nr_fds]; region_info = alloca(size); region_info->argsz = size; region_info->index = index; - do_get_device_region_info(sock, region_info); + do_get_device_region_info(sock, region_info, NULL, 0); if (region_info->argsz > size) { size = region_info->size; region_info = alloca(size); region_info->argsz = size; region_info->index = index; - do_get_device_region_info(sock, region_info); + do_get_device_region_info(sock, region_info, fds, &nr_fds); assert(region_info->size == size); + assert(nr_fds == 2); + assert(fds[0] >= 0); + assert(fds[1] >= 0); } cap_sz = region_info->argsz - sizeof(struct vfio_region_info); @@ -286,7 +290,25 @@ get_device_region_info(int sock, uint32_t index) "cap_sz %lu\n", __func__, index, region_info->offset, region_info->flags, region_info->size, cap_sz); if (cap_sz) { - if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1))) { + struct vfio_region_info_cap_sparse_mmap *sparse = NULL; + if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1), + sparse)) { + if (sparse != NULL) { + size_t i; + assert(nr_fds == 2); + assert(sparse->nr_areas == 2); + for (i = 0; i < sparse->nr_areas; i++) { + void *addr = mmap(NULL, sparse->areas[i].size, + PROT_READ | PROT_WRITE, MAP_SHARED, + fds[i], sparse->areas[i].offset); + if (addr == MAP_FAILED) { + err(EXIT_FAILURE, + "failed to mmap sparse region %lu (%#llx-%#llx)", + i, sparse->areas[i].offset, + sparse->areas[i].offset + sparse->areas[i].size - 1); + } + } + } return true; } } @@ -386,7 +408,7 @@ configure_irqs(int sock) ret = vfu_msg_iovec(sock, msg_id, VFIO_USER_DEVICE_SET_IRQS, iovecs, ARRAY_SIZE(iovecs), &irq_fd, 1, - NULL, NULL, 0); + NULL, NULL, 0, NULL, 0); if (ret < 0) { errx(EXIT_FAILURE, "failed to send configure IRQs message: %s", @@ -435,7 +457,7 @@ access_region(int sock, int region, bool is_write, uint64_t offset, ret = vfu_msg_iovec(sock, 0, op, send_iovecs, nr_send_iovecs, NULL, 0, NULL, - &recv_data, recv_data_len); + &recv_data, recv_data_len, NULL, 0); if (ret != 0) { warnx("failed to %s region %d %#lx-%#lx: %s", is_write ? "write to" : "read from", region, offset, @@ -475,8 +497,7 @@ wait_for_irqs(int sock, int irq_fd) printf("INTx triggered!\n"); size = sizeof(vfio_user_irq_info); - ret = vfu_recv(sock, &hdr, false, &msg_id, - &vfio_user_irq_info, &size); + ret = vfu_recv(sock, &hdr, false, &msg_id, &vfio_user_irq_info, &size); if (ret < 0) { errx(EXIT_FAILURE, "failed to receive IRQ message: %s", strerror(-ret)); @@ -664,7 +685,7 @@ get_dirty_bitmaps(int sock, struct vfio_user_dma_region *dma_regions, ret = vfu_msg_iovec(sock, 0, VFIO_USER_DIRTY_PAGES, iovecs, ARRAY_SIZE(iovecs), NULL, 0, - &hdr, data, ARRAY_SIZE(data)); + &hdr, data, ARRAY_SIZE(data), NULL, 0); if (ret != 0) { errx(EXIT_FAILURE, "failed to start dirty page logging: %s", strerror(-ret)); @@ -903,7 +924,7 @@ map_dma_regions(int sock, int max_fds, struct vfio_user_dma_region *dma_regions, ret = vfu_msg_iovec(sock, i, VFIO_USER_DMA_MAP, iovecs, ARRAY_SIZE(iovecs), dma_region_fds + (i * max_fds), max_fds, - NULL, NULL, 0); + NULL, NULL, 0, NULL, 0); if (ret < 0) { errx(EXIT_FAILURE, "failed to map DMA regions: %s", strerror(-ret)); } diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c index da7c4c0..e31db6d 100644 --- a/samples/gpio-pci-idio-16.c +++ b/samples/gpio-pci-idio-16.c @@ -122,7 +122,7 @@ main(int argc, char *argv[]) } ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR2_REGION_IDX, 0x100, - &bar2_access, VFU_REGION_FLAG_RW, NULL, 0, NULL); + &bar2_access, VFU_REGION_FLAG_RW, NULL, 0, -1); if (ret < 0) { fprintf(stderr, "failed to setup region\n"); goto out; diff --git a/samples/server.c b/samples/server.c index a1bedac..7860f86 100644 --- a/samples/server.c +++ b/samples/server.c @@ -232,14 +232,6 @@ static void do_dma_io(vfu_ctx_t *vfu_ctx, struct server_data *server_data) } } -unsigned long map_area(UNUSED void *pvt, UNUSED unsigned long off, - UNUSED unsigned long len) -{ - assert(false); - - return 0; -} - static int device_reset(UNUSED void *pvt) { printf("device reset callback\n"); @@ -386,6 +378,7 @@ int main(int argc, char *argv[]) vfu_pci_hdr_id_t id = {.raw = 0xdeadbeef}; vfu_pci_hdr_ss_t ss = {.raw = 0xcafebabe}; vfu_pci_hdr_cc_t cc = {.pi = 0xab, .scc = 0xcd, .bcc = 0xef}; + FILE *fp; while ((opt = getopt(argc, argv, "v")) != -1) { switch (opt) { @@ -431,7 +424,7 @@ int main(int argc, char *argv[]) } ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, sizeof(time_t), - &bar0_access, VFU_REGION_FLAG_RW, NULL, 0, NULL); + &bar0_access, VFU_REGION_FLAG_RW, NULL, 0, -1); if (ret < 0) { err(EXIT_FAILURE, "failed to setup BAR0 region"); } @@ -440,13 +433,19 @@ int main(int argc, char *argv[]) * Setup BAR1 to be 3 pages in size where only the first and the last pages * are mappable. */ + if ((fp = tmpfile()) == NULL) { + err(EXIT_FAILURE, "failed to create BAR1 file"); + } + if (ftruncate(fileno(fp), 0x3000) == -1) { + err(EXIT_FAILURE, "failed to truncate BAR1 file"); + } struct iovec mmap_areas[] = { { .iov_base = (void*)0, .iov_len = 0x1000 }, { .iov_base = (void*)0x2000, .iov_len = 0x1000 } }; ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR1_REGION_IDX, 0x3000, &bar1_access, VFU_REGION_FLAG_RW, - mmap_areas, 3, map_area); + mmap_areas, 2, fileno(fp)); if (ret < 0) { err(EXIT_FAILURE, "failed to setup BAR1 region"); } diff --git a/test/mocks.c b/test/mocks.c index 204ae75..faa8ed4 100644 --- a/test/mocks.c +++ b/test/mocks.c @@ -100,8 +100,8 @@ __wrap_get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int __wrap_exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, - size_t size, int *fds, size_t *nr_fds, - struct iovec *_iovecs, struct iovec **iovecs, + size_t size, int *fds, size_t *nr_fds, size_t **fds_out, + int *nr_fds_out, struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data) { check_expected(vfu_ctx); @@ -109,6 +109,8 @@ __wrap_exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, check_expected(size); check_expected(fds); check_expected(nr_fds); + check_expected(fds_out); + check_expected(nr_fds_out); check_expected(_iovecs); check_expected(iovecs); check_expected(nr_iovecs); @@ -144,7 +146,10 @@ __wrap_vfu_send_iovec(int sock, uint16_t msg_id, bool is_reply, void __wrap_free(void *ptr) { - assert(false); + if (!is_patched(free)) { + __real_free(ptr); + return; + } check_expected(ptr); } diff --git a/test/unit-tests.c b/test/unit-tests.c index c3946b0..4d28db7 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -302,6 +302,8 @@ test_process_command_free_passed_fds(void **state __attribute__((unused))) expect_value(__wrap_exec_command, size, 0x0000beef); expect_check(__wrap_exec_command, fds, &set_fds, &exec_command); expect_any(__wrap_exec_command, nr_fds); + expect_any(__wrap_exec_command, fds_out); + expect_any(__wrap_exec_command, nr_fds_out); expect_any(__wrap_exec_command, _iovecs); expect_any(__wrap_exec_command, iovecs); expect_any(__wrap_exec_command, nr_iovecs); @@ -405,10 +407,12 @@ test_get_region_info(UNUSED void **state) }, { .flags = VFU_REGION_FLAG_RW, - .size = 0xdeadbeef + .size = 0xdeadbeef, + .fd = 0x12345 } }; vfu_ctx_t vfu_ctx = { + .client_max_fds = 1, .nr_regions = 2, .reg_info = reg_info }; @@ -416,23 +420,32 @@ test_get_region_info(UNUSED void **state) uint32_t argsz = 0; struct vfio_region_info *vfio_reg; struct vfu_sparse_mmap_areas *mmap_areas = alloca(sizeof(struct vfu_sparse_mmap_areas) + sizeof(struct iovec)); - + int *fds = NULL; + size_t nr_fds; + /* bad argsz */ - assert_int_equal(-EINVAL, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg)); + assert_int_equal(-EINVAL, + dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, + &fds, &nr_fds)); /* bad region */ index = vfu_ctx.nr_regions; argsz = sizeof(struct vfio_region_info); - assert_int_equal(-EINVAL, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg)); + assert_int_equal(-EINVAL, + dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, + &fds, &nr_fds)); /* no region caps */ index = 1; - assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg)); + assert_int_equal(0, + dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, + &fds, &nr_fds)); assert_int_equal(sizeof(struct vfio_region_info), vfio_reg->argsz); assert_int_equal(VFU_REGION_FLAG_RW, vfio_reg->flags); assert_int_equal(1, vfio_reg->index); assert_int_equal(0x10000000000, region_to_offset(vfio_reg->index)); assert_int_equal(0xdeadbeef, vfio_reg->size); + assert_int_equal(0, nr_fds); /* regions caps (sparse mmap) but argsz too small */ mmap_areas->nr_mmap_areas = 1; @@ -440,20 +453,28 @@ test_get_region_info(UNUSED void **state) mmap_areas->areas[0].iov_len = 0x0d15ea5e; vfu_ctx.reg_info[1].mmap_areas = mmap_areas; vfu_ctx.reg_info[1].flags |= VFIO_REGION_INFO_FLAG_MMAP; - assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg)); + assert_int_equal(0, + dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, + &fds, &nr_fds)); assert_int_equal(argsz + sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area), vfio_reg->argsz); assert_int_equal(VFU_REGION_FLAG_RW | VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS, vfio_reg->flags); + assert_int_equal(0, nr_fds); /* region caps and argsz large enough */ argsz += sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area); - assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg)); + assert_int_equal(0, + dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, + &fds, &nr_fds)); struct vfio_region_info_cap_sparse_mmap *sparse = (struct vfio_region_info_cap_sparse_mmap*)(vfio_reg + 1); assert_int_equal(VFIO_REGION_INFO_CAP_SPARSE_MMAP, sparse->header.id); assert_int_equal(1, sparse->header.version); assert_int_equal(0, sparse->header.next); assert_int_equal(1, sparse->nr_areas); + assert_non_null(fds); + assert_int_equal(1, nr_fds); + assert_int_equal(0x12345, fds[0]); /* FIXME add check for migration region and for multiple sparse areas */ } @@ -559,6 +580,45 @@ test_device_get_info(void **state __attribute__((unused))) } /* + * Performs various checks when adding sparse memory regions. + */ +static void +test_setup_sparse_region(void **state __attribute__((unused))) +{ + vfu_reg_info_t reg_info; + vfu_ctx_t vfu_ctx = { .reg_info = ®_info }; + struct iovec mmap_areas[2] = { + [0] = { + .iov_base = (void*)0x0, + .iov_len = 0x1000 + }, + [1] = { + .iov_base = (void*)0x1000, + .iov_len = 0x1000 + } + }; + + /* bad fd */ + assert_int_equal(-1, + vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, -1)); + assert_int_equal(EBADF, errno); + + /* sparse region exceeds region size */ + mmap_areas[1].iov_len = 0x1001; + assert_int_equal(-1, + vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, 0)); + assert_int_equal(EINVAL, errno); + + /* sparse region within region size */ + mmap_areas[1].iov_len = 0x1000; + assert_int_equal(0, + vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, 0)); +} + +/* * FIXME we shouldn't have to specify a setup function explicitly for each unit * test, cmocka should provide that. E.g. cmocka_run_group_tests enables us to * run a function before/after ALL unit tests have finished, we can extend it @@ -587,7 +647,8 @@ int main(void) cmocka_unit_test_setup(test_vfu_ctx_create, setup), cmocka_unit_test_setup(test_pci_caps, setup), cmocka_unit_test_setup(test_device_get_info, setup), - cmocka_unit_test_setup(test_get_region_info, setup) + cmocka_unit_test_setup(test_get_region_info, setup), + cmocka_unit_test_setup(test_setup_sparse_region, setup) }; return cmocka_run_group_tests(tests, NULL, NULL); |