aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/libvfio-user.h59
-rw-r--r--lib/libvfio-user.c83
-rw-r--r--lib/private.h11
-rw-r--r--lib/tran_sock.c129
-rw-r--r--lib/tran_sock.h39
-rw-r--r--samples/client.c55
-rw-r--r--samples/gpio-pci-idio-16.c2
-rw-r--r--samples/server.c19
-rw-r--r--test/mocks.c11
-rw-r--r--test/unit-tests.c77
10 files changed, 340 insertions, 145 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h
index 342f3e5..d5a8c6a 100644
--- a/include/libvfio-user.h
+++ b/include/libvfio-user.h
@@ -188,28 +188,6 @@ void *
vfu_mmap(vfu_ctx_t * vfu_ctx, off_t offset, size_t length);
/**
- * Prototype for memory access callback. The program MUST first map device
- * memory in its own virtual address space using vfu_mmap, do any additional work
- * required, and finally return that memory. When a region is memory mapped,
- * libvfio-user calls the previously registered callback with the following
- * arguments:
- *
- * @pvt: private pointer
- * @off: offset of memory area being memory mapped
- * @len: length of memory area being memory mapped
- *
- * @returns the memory address returned by vfu_mmap, or MAP_FAILED on failure
- */
-typedef unsigned long (vfu_map_region_cb_t) (void *pvt, unsigned long off,
- unsigned long len);
-
-#define VFU_REGION_FLAG_READ (1 << 0)
-#define VFU_REGION_FLAG_WRITE (1 << 1)
-#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar?
-#define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE)
-#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO
-
-/**
* Prototype for region access callback. When a region is accessed, libvfio-user
* calls the previously registered callback with the following arguments:
*
@@ -224,6 +202,12 @@ typedef unsigned long (vfu_map_region_cb_t) (void *pvt, unsigned long off,
typedef ssize_t (vfu_region_access_cb_t) (void *pvt, char *buf, size_t count,
loff_t offset, bool is_write);
+#define VFU_REGION_FLAG_READ (1 << 0)
+#define VFU_REGION_FLAG_WRITE (1 << 1)
+#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar?
+#define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE)
+#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO
+
/**
* Set up a region.
*
@@ -236,11 +220,30 @@ typedef ssize_t (vfu_region_access_cb_t) (void *pvt, char *buf, size_t count,
* @vfu_ctx: the libvfio-user context
* @region_idx: region index
* @size: size of the region
- * @region_access: callback function to access region
- * @flags: region flags
- * @mmap_areas: array of memory mappable areas
- * @nr_mmap_areas: size of mmap_areas
- * @map: callback function to map region
+ * @region_access: callback function to access region. If the region is memory
+ * mappable and the client accesses the region or part of sparse area, then
+ * the callback is not called.
+ * @flags: region flags (VFU_REGION_FLAG_)
+ * @mmap_areas: array of memory mappable areas. This array provides to the
+ * server greater control of which specific areas should be memory mapped by
+ * the client. Each element in the @mmap_areas array describes one such area.
+ * Ignored if @nr_mmap_areas is 0 or if the region is not memory mappable.
+ * @nr_mmap_areas: number of sparse areas in @mmap_areas. Must be 0 if the
+ * region is not memory mappable.
+ * @fd: file descriptor of the file backing the region if it's a mappable
+ * region. It is the server's responsibility to create a file suitable for
+ * memory mapping by the client. Ignored if the region is not memory mappable.
+ *
+ * A note on memory-mappable regions: the client can memory map any part of the
+ * file descriptor, even if not supposed to do so acocrding to @mmap_areas.
+ * There is no way in Linux to avoid this.
+ *
+ * TODO maybe we should introduce per-sparse region file descriptors so that
+ * the client cannot possibly memory map areas it's not supposed to. Even if
+ * the client needs to have region under the same backing file, it is possible
+ * to create linear device-mapper targets, one for each area, and provide file
+ * descriptors of these DM targets. This is something we can document and
+ * demonstrate in a sample.
*
* @returns 0 on success, -1 on error, Sets errno.
*/
@@ -248,7 +251,7 @@ int
vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
vfu_region_access_cb_t *region_access, int flags,
struct iovec *mmap_areas, uint32_t nr_mmap_areas,
- vfu_map_region_cb_t *map);
+ int fd);
/*
* Callback function that is called when the guest resets the device.
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index fab68d5..846a70f 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -107,9 +107,9 @@ get_vfio_caps_size(bool is_migr_reg, struct vfu_sparse_mmap_areas *m)
* Sparse mmap information stays after struct vfio_region_info and cap_offest
* points accordingly.
*/
-static void
+static int
dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
- struct vfio_region_info *vfio_reg)
+ struct vfio_region_info *vfio_reg, int **fds, size_t *nr_fds)
{
struct vfio_info_cap_header *header;
struct vfio_region_info_cap_type *type = NULL;
@@ -118,6 +118,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
assert(vfu_ctx != NULL);
assert(vfio_reg != NULL);
+ assert(fds != NULL);
+ assert(nr_fds != NULL);
header = (struct vfio_info_cap_header*)(vfio_reg + 1);
@@ -140,13 +142,25 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
vfio_reg->cap_offset = sizeof(struct vfio_region_info);
sparse = (struct vfio_region_info_cap_sparse_mmap*)header;
}
+
+ /*
+ * FIXME need to figure out how to break message into smaller messages
+ * so that we don't exceed client_max_fds
+ */
+ assert(nr_mmap_areas <= vfu_ctx->client_max_fds);
+
+ *fds = malloc(nr_mmap_areas * sizeof(int));
+ if (*fds == NULL) {
+ return -ENOMEM;
+ }
sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
sparse->header.version = 1;
sparse->header.next = 0;
- sparse->nr_areas = nr_mmap_areas;
+ sparse->nr_areas = *nr_fds = nr_mmap_areas;
mmap_areas = vfu_reg->mmap_areas;
for (i = 0; i < nr_mmap_areas; i++) {
+ (*fds)[i] = vfu_reg->fd;
sparse->areas[i].offset = (__u64)mmap_areas->areas[i].iov_base;
sparse->areas[i].size = mmap_areas->areas[i].iov_len;
vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d %#llx-%#llx", __func__,
@@ -154,6 +168,7 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
sparse->areas[i].offset + sparse->areas[i].size);
}
}
+ return 0;
}
#define VFU_REGION_SHIFT 40
@@ -207,7 +222,7 @@ is_migr_reg(vfu_ctx_t *vfu_ctx, int index)
long
dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg)
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds)
{
vfu_reg_info_t *vfu_reg;
size_t caps_size;
@@ -243,12 +258,14 @@ dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
(*vfio_reg)->offset = region_to_offset((*vfio_reg)->index);
(*vfio_reg)->size = vfu_reg->size;
+ *nr_fds = 0;
if (caps_size > 0) {
if (vfu_reg->mmap_areas != NULL) {
(*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS;
}
if (argsz >= (*vfio_reg)->argsz) {
- dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), *vfio_reg);
+ dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index),
+ *vfio_reg, fds, nr_fds);
}
}
@@ -482,14 +499,15 @@ vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count,
static int
handle_device_get_region_info(vfu_ctx_t *vfu_ctx, uint32_t size,
struct vfio_region_info *reg_info_in,
- struct vfio_region_info **reg_info_out)
+ struct vfio_region_info **reg_info_out,
+ int **fds, size_t *nr_fds)
{
if (size < sizeof(*reg_info_in)) {
return -EINVAL;
}
return dev_get_reginfo(vfu_ctx, reg_info_in->index, reg_info_in->argsz,
- reg_info_out);
+ reg_info_out, fds, nr_fds);
}
int
@@ -879,7 +897,7 @@ UNIT_TEST_SYMBOL(get_next_command);
int
exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
- int *fds, size_t nr_fds,
+ int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out,
struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs,
bool *free_iovec_data)
{
@@ -958,7 +976,8 @@ exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
break;
case VFIO_USER_DEVICE_GET_REGION_INFO:
ret = handle_device_get_region_info(vfu_ctx, hdr->msg_size, cmd_data,
- &dev_reg_info);
+ &dev_reg_info, fds_out,
+ nr_fds_out);
if (ret == 0) {
_iovecs[1].iov_base = dev_reg_info;
_iovecs[1].iov_len = hdr->msg_size;
@@ -1024,8 +1043,9 @@ process_request(vfu_ctx_t *vfu_ctx)
{
struct vfio_user_header hdr = { 0, };
int ret;
- int *fds = NULL;
+ int *fds = NULL, *fds_out = NULL;
size_t nr_fds, i;
+ size_t nr_fds_out = 0;
struct iovec _iovecs[2] = { { 0, } };
struct iovec *iovecs = NULL;
size_t nr_iovecs = 0;
@@ -1054,8 +1074,8 @@ process_request(vfu_ctx_t *vfu_ctx)
return ret;
}
- ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, _iovecs, &iovecs,
- &nr_iovecs, &free_iovec_data);
+ ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, &fds_out, &nr_fds_out,
+ _iovecs, &iovecs, &nr_iovecs, &free_iovec_data);
for (i = 0; i < nr_fds; i++) {
if (fds[i] != -1) {
@@ -1081,7 +1101,7 @@ process_request(vfu_ctx_t *vfu_ctx)
if (!(hdr.flags.no_reply)) {
// FIXME: SPEC: should the reply include the command? I'd say yes?
ret = vfu_send_iovec(vfu_ctx->conn_fd, hdr.msg_id, true,
- 0, iovecs, nr_iovecs, NULL, 0, -ret);
+ 0, iovecs, nr_iovecs, fds_out, nr_fds_out, -ret);
if (unlikely(ret < 0)) {
vfu_log(vfu_ctx, LOG_ERR, "failed to complete command: %s",
strerror(-ret));
@@ -1465,11 +1485,37 @@ copy_sparse_mmap_areas(vfu_reg_info_t *reg_info,
return 0;
}
+static int
+setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas,
+ uint32_t nr_mmap_areas, int fd)
+{
+ int ret, i;
+
+ assert(r != NULL);
+
+ if (fd == -1) {
+ return -EBADF;
+ }
+ r->fd = fd;
+ ret = copy_sparse_mmap_areas(r, mmap_areas, nr_mmap_areas);
+ if (ret < 0) {
+ return ret;
+ }
+ for (i = 0; i < r->mmap_areas->nr_mmap_areas; i++) {
+ struct iovec *a = &r->mmap_areas->areas[i];
+ if ((unsigned long long)a->iov_base + a->iov_len > r->size) {
+ free(r->mmap_areas);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
int
vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
vfu_region_access_cb_t *region_access, int flags,
struct iovec *mmap_areas, uint32_t nr_mmap_areas,
- vfu_map_region_cb_t *map)
+ int fd)
{
int ret;
@@ -1487,12 +1533,9 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
vfu_ctx->reg_info[region_idx].size = size;
vfu_ctx->reg_info[region_idx].fn = region_access;
- if (map != NULL) {
- vfu_ctx->reg_info[region_idx].map = map;
- }
- if (mmap_areas) {
- ret = copy_sparse_mmap_areas(&vfu_ctx->reg_info[region_idx],
- mmap_areas, nr_mmap_areas);
+ if (nr_mmap_areas > 0) {
+ ret = setup_sparse_areas(&vfu_ctx->reg_info[region_idx], mmap_areas,
+ nr_mmap_areas, fd);
if (ret < 0) {
return ERROR(-ret);
}
diff --git a/lib/private.h b/lib/private.h
index 7860ec2..968271d 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -91,12 +91,8 @@ typedef struct {
*/
vfu_region_access_cb_t *fn;
- /*
- * Callback function that is called when the region is memory mapped.
- * Required if VFU_REGION_FLAG_MEM is set, otherwise ignored.
- */
- vfu_map_region_cb_t *map;
struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */
+ int fd;
} vfu_reg_info_t;
struct pci_dev {
@@ -158,7 +154,7 @@ get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds,
int
exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
- int *fds, size_t nr_fds,
+ int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out,
struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs,
bool *free_iovec_data);
@@ -171,9 +167,10 @@ consume_fd(int *fds, size_t nr_fds, size_t index);
int
handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size,
struct vfio_device_info *dev_info);
+
long
dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg);
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds);
#endif /* LIB_VFIO_USER_PRIVATE_H */
diff --git a/lib/tran_sock.c b/lib/tran_sock.c
index f96784d..dd682db 100644
--- a/lib/tran_sock.c
+++ b/lib/tran_sock.c
@@ -214,6 +214,52 @@ vfu_send_error(int sock, uint16_t msg_id,
return vfu_send_iovec(sock, msg_id, true, cmd, NULL, 0, NULL, 0, error);
}
+static int
+get_msg(void *data, size_t len, int *fds, size_t *nr_fds, int sock_fd,
+ int sock_flags)
+{
+ int ret;
+ struct iovec iov = {.iov_base = data, .iov_len = len};
+ struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1};
+ struct cmsghdr *cmsg;
+
+ if (nr_fds != NULL && *nr_fds > 0) {
+ assert(fds != NULL);
+ msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds);
+ msg.msg_control = alloca(msg.msg_controllen);
+ *nr_fds = 0;
+ }
+
+ ret = recvmsg(sock_fd, &msg, sock_flags);
+ if (ret == -1) {
+ return -errno;
+ }
+
+ if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) {
+ return -EFAULT;
+ }
+
+ if (nr_fds != NULL) {
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
+ continue;
+ }
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) {
+ return -EINVAL;
+ }
+ int size = cmsg->cmsg_len - CMSG_LEN(0);
+ if (size % sizeof(int) != 0) {
+ return -EINVAL;
+ }
+ *nr_fds = (int)(size / sizeof(int));
+ memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int));
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* Receive a vfio-user message. If "len" is set to non-zero, the message should
* include data of that length, which is stored in the pre-allocated "data"
@@ -224,14 +270,15 @@ vfu_send_error(int sock, uint16_t msg_id,
* better.
*/
int
-vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
- uint16_t *msg_id, void *data, size_t *len)
+vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len, int *fds,
+ size_t *nr_fds)
{
int ret;
/* FIXME if ret == -1 then fcntl can overwrite recv's errno */
- ret = recv_blocking(sock, hdr, sizeof(*hdr), 0);
+ ret = get_msg(hdr, sizeof *hdr, fds, nr_fds, sock, 0);
if (ret == -1) {
return -errno;
}
@@ -275,6 +322,13 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
return 0;
}
+int
+vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len)
+{
+ return vfu_recv_fds(sock, hdr, is_reply, msg_id, data, len, NULL, NULL);
+}
+
/*
* Like vfu_recv(), but will automatically allocate reply data.
*
@@ -333,26 +387,29 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
int
vfu_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd,
struct iovec *iovecs, size_t nr_iovecs,
- int *send_fds, size_t fd_count,
+ int *send_fds, size_t send_fd_count,
struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len)
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count)
{
int ret = vfu_send_iovec(sock, msg_id, false, cmd, iovecs, nr_iovecs,
- send_fds, fd_count, 0);
+ send_fds, send_fd_count, 0);
if (ret < 0) {
return ret;
}
if (hdr == NULL) {
hdr = alloca(sizeof *hdr);
}
- return vfu_recv(sock, hdr, true, &msg_id, recv_data, &recv_len);
+ return vfu_recv_fds(sock, hdr, true, &msg_id, recv_data, &recv_len,
+ recv_fds, recv_fd_count);
}
int
-vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
- void *send_data, size_t send_len,
- struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len)
+vfu_msg_fds(int sock, uint16_t msg_id, enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len, int *recv_fds,
+ size_t *recv_fd_count)
{
/* [0] is for the header. */
struct iovec iovecs[2] = {
@@ -362,7 +419,18 @@ vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
}
};
return vfu_msg_iovec(sock, msg_id, cmd, iovecs, ARRAY_SIZE(iovecs),
- NULL, 0, hdr, recv_data, recv_len);
+ NULL, 0, hdr, recv_data, recv_len, recv_fds,
+ recv_fd_count);
+}
+
+int
+vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len)
+{
+ return vfu_msg_fds(sock, msg_id, cmd, send_data, send_len, hdr, recv_data,
+ recv_len, NULL, NULL);
}
/*
@@ -650,15 +718,7 @@ static int
get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
int *fds, size_t *nr_fds)
{
- int ret, sock_flags = 0;
- struct iovec iov = {.iov_base = hdr, .iov_len = sizeof *hdr};
- struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1};
- struct cmsghdr *cmsg;
-
- msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds);
- msg.msg_control = alloca(msg.msg_controllen);
-
- *nr_fds = 0;
+ int sock_flags = 0;
/*
* TODO ideally we should set O_NONBLOCK on the fd so that the syscall is
@@ -668,32 +728,7 @@ get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) {
sock_flags = MSG_DONTWAIT | MSG_WAITALL;
}
- ret = recvmsg(vfu_ctx->conn_fd, &msg, sock_flags);
- if (ret == -1) {
- return -errno;
- }
-
- if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) {
- return -EFAULT;
- }
-
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
- continue;
- }
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) {
- return -EINVAL;
- }
- int size = cmsg->cmsg_len - CMSG_LEN(0);
- if (size % sizeof(int) != 0) {
- return -EINVAL;
- }
- *nr_fds = (int)(size / sizeof(int));
- memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int));
- break;
- }
-
- return ret;
+ return get_msg(hdr, sizeof *hdr, fds, nr_fds, vfu_ctx->conn_fd, sock_flags);
}
struct transport_ops sock_transport_ops = {
diff --git a/lib/tran_sock.h b/lib/tran_sock.h
index b764e9b..c4ed30c 100644
--- a/lib/tran_sock.h
+++ b/lib/tran_sock.h
@@ -92,6 +92,15 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
uint16_t *msg_id, void *data, size_t *len);
/*
+ * Same as vfu_recv except it receives passed file descriptors. See vfu_msg on
+ * the semantics of @fds and @nr_fds.
+ */
+int
+vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len, int *fds,
+ size_t *nr_fds);
+
+/*
* Receive a message from the other end, but automatically allocate a buffer for
* it, which must be freed by the caller. If there is no data, *datap is set to
* NULL.
@@ -105,16 +114,25 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
* iovecs array should leave the first entry empty, as it will be used for the
* header.
*
- * If specified, the given fds are sent to the other side. @hdr is filled with
- * the reply header if non-NULL.
+ * If specified, the given @send_fds are sent to the other side. @hdr is filled
+ * with the reply header if non-NULL.
+ *
+ * @recv_fds and @recv_fd_count are used to receive file descriptors.
+ * If @recv_fd_count is NULL then @recv_fds is ignored and no file descriptors
+ * are received. If @recv_fd_count is non-NULL then it contains the number of
+ * file descriptors that can be stored in @recv_fds, in which case @recv_fds
+ * must point to sufficient memory. On return, @recv_fd_count contains the
+ * number of file decriptors actually received, which does not exceeed the
+ * original value of @recv_fd_count.
*/
int
vfu_msg_iovec(int sock, uint16_t msg_id,
enum vfio_user_command cmd,
struct iovec *iovecs, size_t nr_iovecs,
- int *send_fds, size_t fd_count,
+ int *send_fds, size_t send_fd_count,
struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len);
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count);
/*
* Send and receive a message to the other end. @hdr is filled with the reply
@@ -127,6 +145,19 @@ vfu_msg(int sock, uint16_t msg_id,
struct vfio_user_header *hdr,
void *recv_data, size_t recv_len);
+/*
+ * Same as vfu_msg excecpt that file descriptors can be received, see
+ * vfu_msg_iovec for the semantics of @recv_fds and @recv_fd_count.
+ */
+int
+vfu_msg_fds(int sock, uint16_t msg_id,
+ enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count);
+
+
#endif /* LIB_VFIO_USER_TRAN_SOCK_H */
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/samples/client.c b/samples/client.c
index 3e3ba54..576815d 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -207,10 +207,10 @@ send_device_reset(int sock)
/* returns whether a VFIO migration capability is found */
static bool
-get_region_vfio_caps(struct vfio_info_cap_header *header)
+get_region_vfio_caps(struct vfio_info_cap_header *header,
+ struct vfio_region_info_cap_sparse_mmap *sparse)
{
struct vfio_region_info_cap_type *type;
- struct vfio_region_info_cap_sparse_mmap *sparse;
unsigned int i;
bool migr = false;
@@ -247,13 +247,12 @@ get_region_vfio_caps(struct vfio_info_cap_header *header)
}
static void
-do_get_device_region_info(int sock, struct vfio_region_info *region_info)
+do_get_device_region_info(int sock, struct vfio_region_info *region_info,
+ int *fds, size_t *nr_fds)
{
- int ret = vfu_msg(sock, 0,
- VFIO_USER_DEVICE_GET_REGION_INFO,
- region_info, region_info->argsz,
- NULL,
- region_info, region_info->argsz);
+ int ret = vfu_msg_fds(sock, 0, VFIO_USER_DEVICE_GET_REGION_INFO,
+ region_info, region_info->argsz, NULL,
+ region_info, region_info->argsz, fds, nr_fds);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to get device region info: %s",
strerror(-ret));
@@ -266,19 +265,24 @@ get_device_region_info(int sock, uint32_t index)
struct vfio_region_info *region_info;
size_t cap_sz;
size_t size = sizeof(struct vfio_region_info);
+ size_t nr_fds = CLIENT_MAX_FDS;
+ int fds[nr_fds];
region_info = alloca(size);
region_info->argsz = size;
region_info->index = index;
- do_get_device_region_info(sock, region_info);
+ do_get_device_region_info(sock, region_info, NULL, 0);
if (region_info->argsz > size) {
size = region_info->size;
region_info = alloca(size);
region_info->argsz = size;
region_info->index = index;
- do_get_device_region_info(sock, region_info);
+ do_get_device_region_info(sock, region_info, fds, &nr_fds);
assert(region_info->size == size);
+ assert(nr_fds == 2);
+ assert(fds[0] >= 0);
+ assert(fds[1] >= 0);
}
cap_sz = region_info->argsz - sizeof(struct vfio_region_info);
@@ -286,7 +290,25 @@ get_device_region_info(int sock, uint32_t index)
"cap_sz %lu\n", __func__, index, region_info->offset,
region_info->flags, region_info->size, cap_sz);
if (cap_sz) {
- if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1))) {
+ struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
+ if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1),
+ sparse)) {
+ if (sparse != NULL) {
+ size_t i;
+ assert(nr_fds == 2);
+ assert(sparse->nr_areas == 2);
+ for (i = 0; i < sparse->nr_areas; i++) {
+ void *addr = mmap(NULL, sparse->areas[i].size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ fds[i], sparse->areas[i].offset);
+ if (addr == MAP_FAILED) {
+ err(EXIT_FAILURE,
+ "failed to mmap sparse region %lu (%#llx-%#llx)",
+ i, sparse->areas[i].offset,
+ sparse->areas[i].offset + sparse->areas[i].size - 1);
+ }
+ }
+ }
return true;
}
}
@@ -386,7 +408,7 @@ configure_irqs(int sock)
ret = vfu_msg_iovec(sock, msg_id, VFIO_USER_DEVICE_SET_IRQS,
iovecs, ARRAY_SIZE(iovecs),
&irq_fd, 1,
- NULL, NULL, 0);
+ NULL, NULL, 0, NULL, 0);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to send configure IRQs message: %s",
@@ -435,7 +457,7 @@ access_region(int sock, int region, bool is_write, uint64_t offset,
ret = vfu_msg_iovec(sock, 0, op,
send_iovecs, nr_send_iovecs,
NULL, 0, NULL,
- &recv_data, recv_data_len);
+ &recv_data, recv_data_len, NULL, 0);
if (ret != 0) {
warnx("failed to %s region %d %#lx-%#lx: %s",
is_write ? "write to" : "read from", region, offset,
@@ -475,8 +497,7 @@ wait_for_irqs(int sock, int irq_fd)
printf("INTx triggered!\n");
size = sizeof(vfio_user_irq_info);
- ret = vfu_recv(sock, &hdr, false, &msg_id,
- &vfio_user_irq_info, &size);
+ ret = vfu_recv(sock, &hdr, false, &msg_id, &vfio_user_irq_info, &size);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to receive IRQ message: %s",
strerror(-ret));
@@ -664,7 +685,7 @@ get_dirty_bitmaps(int sock, struct vfio_user_dma_region *dma_regions,
ret = vfu_msg_iovec(sock, 0, VFIO_USER_DIRTY_PAGES,
iovecs, ARRAY_SIZE(iovecs),
NULL, 0,
- &hdr, data, ARRAY_SIZE(data));
+ &hdr, data, ARRAY_SIZE(data), NULL, 0);
if (ret != 0) {
errx(EXIT_FAILURE, "failed to start dirty page logging: %s",
strerror(-ret));
@@ -903,7 +924,7 @@ map_dma_regions(int sock, int max_fds, struct vfio_user_dma_region *dma_regions,
ret = vfu_msg_iovec(sock, i, VFIO_USER_DMA_MAP,
iovecs, ARRAY_SIZE(iovecs),
dma_region_fds + (i * max_fds), max_fds,
- NULL, NULL, 0);
+ NULL, NULL, 0, NULL, 0);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to map DMA regions: %s", strerror(-ret));
}
diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c
index da7c4c0..e31db6d 100644
--- a/samples/gpio-pci-idio-16.c
+++ b/samples/gpio-pci-idio-16.c
@@ -122,7 +122,7 @@ main(int argc, char *argv[])
}
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR2_REGION_IDX, 0x100,
- &bar2_access, VFU_REGION_FLAG_RW, NULL, 0, NULL);
+ &bar2_access, VFU_REGION_FLAG_RW, NULL, 0, -1);
if (ret < 0) {
fprintf(stderr, "failed to setup region\n");
goto out;
diff --git a/samples/server.c b/samples/server.c
index a1bedac..7860f86 100644
--- a/samples/server.c
+++ b/samples/server.c
@@ -232,14 +232,6 @@ static void do_dma_io(vfu_ctx_t *vfu_ctx, struct server_data *server_data)
}
}
-unsigned long map_area(UNUSED void *pvt, UNUSED unsigned long off,
- UNUSED unsigned long len)
-{
- assert(false);
-
- return 0;
-}
-
static int device_reset(UNUSED void *pvt)
{
printf("device reset callback\n");
@@ -386,6 +378,7 @@ int main(int argc, char *argv[])
vfu_pci_hdr_id_t id = {.raw = 0xdeadbeef};
vfu_pci_hdr_ss_t ss = {.raw = 0xcafebabe};
vfu_pci_hdr_cc_t cc = {.pi = 0xab, .scc = 0xcd, .bcc = 0xef};
+ FILE *fp;
while ((opt = getopt(argc, argv, "v")) != -1) {
switch (opt) {
@@ -431,7 +424,7 @@ int main(int argc, char *argv[])
}
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, sizeof(time_t),
- &bar0_access, VFU_REGION_FLAG_RW, NULL, 0, NULL);
+ &bar0_access, VFU_REGION_FLAG_RW, NULL, 0, -1);
if (ret < 0) {
err(EXIT_FAILURE, "failed to setup BAR0 region");
}
@@ -440,13 +433,19 @@ int main(int argc, char *argv[])
* Setup BAR1 to be 3 pages in size where only the first and the last pages
* are mappable.
*/
+ if ((fp = tmpfile()) == NULL) {
+ err(EXIT_FAILURE, "failed to create BAR1 file");
+ }
+ if (ftruncate(fileno(fp), 0x3000) == -1) {
+ err(EXIT_FAILURE, "failed to truncate BAR1 file");
+ }
struct iovec mmap_areas[] = {
{ .iov_base = (void*)0, .iov_len = 0x1000 },
{ .iov_base = (void*)0x2000, .iov_len = 0x1000 }
};
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR1_REGION_IDX,
0x3000, &bar1_access, VFU_REGION_FLAG_RW,
- mmap_areas, 3, map_area);
+ mmap_areas, 2, fileno(fp));
if (ret < 0) {
err(EXIT_FAILURE, "failed to setup BAR1 region");
}
diff --git a/test/mocks.c b/test/mocks.c
index 204ae75..faa8ed4 100644
--- a/test/mocks.c
+++ b/test/mocks.c
@@ -100,8 +100,8 @@ __wrap_get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
int
__wrap_exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
- size_t size, int *fds, size_t *nr_fds,
- struct iovec *_iovecs, struct iovec **iovecs,
+ size_t size, int *fds, size_t *nr_fds, size_t **fds_out,
+ int *nr_fds_out, struct iovec *_iovecs, struct iovec **iovecs,
size_t *nr_iovecs, bool *free_iovec_data)
{
check_expected(vfu_ctx);
@@ -109,6 +109,8 @@ __wrap_exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
check_expected(size);
check_expected(fds);
check_expected(nr_fds);
+ check_expected(fds_out);
+ check_expected(nr_fds_out);
check_expected(_iovecs);
check_expected(iovecs);
check_expected(nr_iovecs);
@@ -144,7 +146,10 @@ __wrap_vfu_send_iovec(int sock, uint16_t msg_id, bool is_reply,
void
__wrap_free(void *ptr)
{
- assert(false);
+ if (!is_patched(free)) {
+ __real_free(ptr);
+ return;
+ }
check_expected(ptr);
}
diff --git a/test/unit-tests.c b/test/unit-tests.c
index c3946b0..4d28db7 100644
--- a/test/unit-tests.c
+++ b/test/unit-tests.c
@@ -302,6 +302,8 @@ test_process_command_free_passed_fds(void **state __attribute__((unused)))
expect_value(__wrap_exec_command, size, 0x0000beef);
expect_check(__wrap_exec_command, fds, &set_fds, &exec_command);
expect_any(__wrap_exec_command, nr_fds);
+ expect_any(__wrap_exec_command, fds_out);
+ expect_any(__wrap_exec_command, nr_fds_out);
expect_any(__wrap_exec_command, _iovecs);
expect_any(__wrap_exec_command, iovecs);
expect_any(__wrap_exec_command, nr_iovecs);
@@ -405,10 +407,12 @@ test_get_region_info(UNUSED void **state)
},
{
.flags = VFU_REGION_FLAG_RW,
- .size = 0xdeadbeef
+ .size = 0xdeadbeef,
+ .fd = 0x12345
}
};
vfu_ctx_t vfu_ctx = {
+ .client_max_fds = 1,
.nr_regions = 2,
.reg_info = reg_info
};
@@ -416,23 +420,32 @@ test_get_region_info(UNUSED void **state)
uint32_t argsz = 0;
struct vfio_region_info *vfio_reg;
struct vfu_sparse_mmap_areas *mmap_areas = alloca(sizeof(struct vfu_sparse_mmap_areas) + sizeof(struct iovec));
-
+ int *fds = NULL;
+ size_t nr_fds;
+
/* bad argsz */
- assert_int_equal(-EINVAL, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg));
+ assert_int_equal(-EINVAL,
+ dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
+ &fds, &nr_fds));
/* bad region */
index = vfu_ctx.nr_regions;
argsz = sizeof(struct vfio_region_info);
- assert_int_equal(-EINVAL, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg));
+ assert_int_equal(-EINVAL,
+ dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
+ &fds, &nr_fds));
/* no region caps */
index = 1;
- assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg));
+ assert_int_equal(0,
+ dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
+ &fds, &nr_fds));
assert_int_equal(sizeof(struct vfio_region_info), vfio_reg->argsz);
assert_int_equal(VFU_REGION_FLAG_RW, vfio_reg->flags);
assert_int_equal(1, vfio_reg->index);
assert_int_equal(0x10000000000, region_to_offset(vfio_reg->index));
assert_int_equal(0xdeadbeef, vfio_reg->size);
+ assert_int_equal(0, nr_fds);
/* regions caps (sparse mmap) but argsz too small */
mmap_areas->nr_mmap_areas = 1;
@@ -440,20 +453,28 @@ test_get_region_info(UNUSED void **state)
mmap_areas->areas[0].iov_len = 0x0d15ea5e;
vfu_ctx.reg_info[1].mmap_areas = mmap_areas;
vfu_ctx.reg_info[1].flags |= VFIO_REGION_INFO_FLAG_MMAP;
- assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg));
+ assert_int_equal(0,
+ dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
+ &fds, &nr_fds));
assert_int_equal(argsz + sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area),
vfio_reg->argsz);
assert_int_equal(VFU_REGION_FLAG_RW | VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS,
vfio_reg->flags);
+ assert_int_equal(0, nr_fds);
/* region caps and argsz large enough */
argsz += sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area);
- assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg));
+ assert_int_equal(0,
+ dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
+ &fds, &nr_fds));
struct vfio_region_info_cap_sparse_mmap *sparse = (struct vfio_region_info_cap_sparse_mmap*)(vfio_reg + 1);
assert_int_equal(VFIO_REGION_INFO_CAP_SPARSE_MMAP, sparse->header.id);
assert_int_equal(1, sparse->header.version);
assert_int_equal(0, sparse->header.next);
assert_int_equal(1, sparse->nr_areas);
+ assert_non_null(fds);
+ assert_int_equal(1, nr_fds);
+ assert_int_equal(0x12345, fds[0]);
/* FIXME add check for migration region and for multiple sparse areas */
}
@@ -559,6 +580,45 @@ test_device_get_info(void **state __attribute__((unused)))
}
/*
+ * Performs various checks when adding sparse memory regions.
+ */
+static void
+test_setup_sparse_region(void **state __attribute__((unused)))
+{
+ vfu_reg_info_t reg_info;
+ vfu_ctx_t vfu_ctx = { .reg_info = &reg_info };
+ struct iovec mmap_areas[2] = {
+ [0] = {
+ .iov_base = (void*)0x0,
+ .iov_len = 0x1000
+ },
+ [1] = {
+ .iov_base = (void*)0x1000,
+ .iov_len = 0x1000
+ }
+ };
+
+ /* bad fd */
+ assert_int_equal(-1,
+ vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, -1));
+ assert_int_equal(EBADF, errno);
+
+ /* sparse region exceeds region size */
+ mmap_areas[1].iov_len = 0x1001;
+ assert_int_equal(-1,
+ vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, 0));
+ assert_int_equal(EINVAL, errno);
+
+ /* sparse region within region size */
+ mmap_areas[1].iov_len = 0x1000;
+ assert_int_equal(0,
+ vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, 0));
+}
+
+/*
* FIXME we shouldn't have to specify a setup function explicitly for each unit
* test, cmocka should provide that. E.g. cmocka_run_group_tests enables us to
* run a function before/after ALL unit tests have finished, we can extend it
@@ -587,7 +647,8 @@ int main(void)
cmocka_unit_test_setup(test_vfu_ctx_create, setup),
cmocka_unit_test_setup(test_pci_caps, setup),
cmocka_unit_test_setup(test_device_get_info, setup),
- cmocka_unit_test_setup(test_get_region_info, setup)
+ cmocka_unit_test_setup(test_get_region_info, setup),
+ cmocka_unit_test_setup(test_setup_sparse_region, setup)
};
return cmocka_run_group_tests(tests, NULL, NULL);