aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2020-12-15 14:56:32 +0000
committerGitHub <noreply@github.com>2020-12-15 14:56:32 +0000
commit8694149b351e3e125aa8f1740f8d5925e7ec112c (patch)
tree55fae56b5280eddd8c5c23d8c846fe36425e7ba2 /lib
parent3148a598f4ac0711d2f46ef9186e51a4779f1bf1 (diff)
downloadlibvfio-user-8694149b351e3e125aa8f1740f8d5925e7ec112c.zip
libvfio-user-8694149b351e3e125aa8f1740f8d5925e7ec112c.tar.gz
libvfio-user-8694149b351e3e125aa8f1740f8d5925e7ec112c.tar.bz2
send file descriptors for sparse areas in get region info (#201)
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/libvfio-user.c83
-rw-r--r--lib/private.h11
-rw-r--r--lib/tran_sock.c129
-rw-r--r--lib/tran_sock.h39
4 files changed, 184 insertions, 78 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index fab68d5..846a70f 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -107,9 +107,9 @@ get_vfio_caps_size(bool is_migr_reg, struct vfu_sparse_mmap_areas *m)
* Sparse mmap information stays after struct vfio_region_info and cap_offest
* points accordingly.
*/
-static void
+static int
dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
- struct vfio_region_info *vfio_reg)
+ struct vfio_region_info *vfio_reg, int **fds, size_t *nr_fds)
{
struct vfio_info_cap_header *header;
struct vfio_region_info_cap_type *type = NULL;
@@ -118,6 +118,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
assert(vfu_ctx != NULL);
assert(vfio_reg != NULL);
+ assert(fds != NULL);
+ assert(nr_fds != NULL);
header = (struct vfio_info_cap_header*)(vfio_reg + 1);
@@ -140,13 +142,25 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
vfio_reg->cap_offset = sizeof(struct vfio_region_info);
sparse = (struct vfio_region_info_cap_sparse_mmap*)header;
}
+
+ /*
+ * FIXME need to figure out how to break message into smaller messages
+ * so that we don't exceed client_max_fds
+ */
+ assert(nr_mmap_areas <= vfu_ctx->client_max_fds);
+
+ *fds = malloc(nr_mmap_areas * sizeof(int));
+ if (*fds == NULL) {
+ return -ENOMEM;
+ }
sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
sparse->header.version = 1;
sparse->header.next = 0;
- sparse->nr_areas = nr_mmap_areas;
+ sparse->nr_areas = *nr_fds = nr_mmap_areas;
mmap_areas = vfu_reg->mmap_areas;
for (i = 0; i < nr_mmap_areas; i++) {
+ (*fds)[i] = vfu_reg->fd;
sparse->areas[i].offset = (__u64)mmap_areas->areas[i].iov_base;
sparse->areas[i].size = mmap_areas->areas[i].iov_len;
vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d %#llx-%#llx", __func__,
@@ -154,6 +168,7 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
sparse->areas[i].offset + sparse->areas[i].size);
}
}
+ return 0;
}
#define VFU_REGION_SHIFT 40
@@ -207,7 +222,7 @@ is_migr_reg(vfu_ctx_t *vfu_ctx, int index)
long
dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg)
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds)
{
vfu_reg_info_t *vfu_reg;
size_t caps_size;
@@ -243,12 +258,14 @@ dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
(*vfio_reg)->offset = region_to_offset((*vfio_reg)->index);
(*vfio_reg)->size = vfu_reg->size;
+ *nr_fds = 0;
if (caps_size > 0) {
if (vfu_reg->mmap_areas != NULL) {
(*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS;
}
if (argsz >= (*vfio_reg)->argsz) {
- dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), *vfio_reg);
+ dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index),
+ *vfio_reg, fds, nr_fds);
}
}
@@ -482,14 +499,15 @@ vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count,
static int
handle_device_get_region_info(vfu_ctx_t *vfu_ctx, uint32_t size,
struct vfio_region_info *reg_info_in,
- struct vfio_region_info **reg_info_out)
+ struct vfio_region_info **reg_info_out,
+ int **fds, size_t *nr_fds)
{
if (size < sizeof(*reg_info_in)) {
return -EINVAL;
}
return dev_get_reginfo(vfu_ctx, reg_info_in->index, reg_info_in->argsz,
- reg_info_out);
+ reg_info_out, fds, nr_fds);
}
int
@@ -879,7 +897,7 @@ UNIT_TEST_SYMBOL(get_next_command);
int
exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
- int *fds, size_t nr_fds,
+ int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out,
struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs,
bool *free_iovec_data)
{
@@ -958,7 +976,8 @@ exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
break;
case VFIO_USER_DEVICE_GET_REGION_INFO:
ret = handle_device_get_region_info(vfu_ctx, hdr->msg_size, cmd_data,
- &dev_reg_info);
+ &dev_reg_info, fds_out,
+ nr_fds_out);
if (ret == 0) {
_iovecs[1].iov_base = dev_reg_info;
_iovecs[1].iov_len = hdr->msg_size;
@@ -1024,8 +1043,9 @@ process_request(vfu_ctx_t *vfu_ctx)
{
struct vfio_user_header hdr = { 0, };
int ret;
- int *fds = NULL;
+ int *fds = NULL, *fds_out = NULL;
size_t nr_fds, i;
+ size_t nr_fds_out = 0;
struct iovec _iovecs[2] = { { 0, } };
struct iovec *iovecs = NULL;
size_t nr_iovecs = 0;
@@ -1054,8 +1074,8 @@ process_request(vfu_ctx_t *vfu_ctx)
return ret;
}
- ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, _iovecs, &iovecs,
- &nr_iovecs, &free_iovec_data);
+ ret = exec_command(vfu_ctx, &hdr, ret, fds, nr_fds, &fds_out, &nr_fds_out,
+ _iovecs, &iovecs, &nr_iovecs, &free_iovec_data);
for (i = 0; i < nr_fds; i++) {
if (fds[i] != -1) {
@@ -1081,7 +1101,7 @@ process_request(vfu_ctx_t *vfu_ctx)
if (!(hdr.flags.no_reply)) {
// FIXME: SPEC: should the reply include the command? I'd say yes?
ret = vfu_send_iovec(vfu_ctx->conn_fd, hdr.msg_id, true,
- 0, iovecs, nr_iovecs, NULL, 0, -ret);
+ 0, iovecs, nr_iovecs, fds_out, nr_fds_out, -ret);
if (unlikely(ret < 0)) {
vfu_log(vfu_ctx, LOG_ERR, "failed to complete command: %s",
strerror(-ret));
@@ -1465,11 +1485,37 @@ copy_sparse_mmap_areas(vfu_reg_info_t *reg_info,
return 0;
}
+static int
+setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas,
+ uint32_t nr_mmap_areas, int fd)
+{
+ int ret, i;
+
+ assert(r != NULL);
+
+ if (fd == -1) {
+ return -EBADF;
+ }
+ r->fd = fd;
+ ret = copy_sparse_mmap_areas(r, mmap_areas, nr_mmap_areas);
+ if (ret < 0) {
+ return ret;
+ }
+ for (i = 0; i < r->mmap_areas->nr_mmap_areas; i++) {
+ struct iovec *a = &r->mmap_areas->areas[i];
+ if ((unsigned long long)a->iov_base + a->iov_len > r->size) {
+ free(r->mmap_areas);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
int
vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
vfu_region_access_cb_t *region_access, int flags,
struct iovec *mmap_areas, uint32_t nr_mmap_areas,
- vfu_map_region_cb_t *map)
+ int fd)
{
int ret;
@@ -1487,12 +1533,9 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
vfu_ctx->reg_info[region_idx].size = size;
vfu_ctx->reg_info[region_idx].fn = region_access;
- if (map != NULL) {
- vfu_ctx->reg_info[region_idx].map = map;
- }
- if (mmap_areas) {
- ret = copy_sparse_mmap_areas(&vfu_ctx->reg_info[region_idx],
- mmap_areas, nr_mmap_areas);
+ if (nr_mmap_areas > 0) {
+ ret = setup_sparse_areas(&vfu_ctx->reg_info[region_idx], mmap_areas,
+ nr_mmap_areas, fd);
if (ret < 0) {
return ERROR(-ret);
}
diff --git a/lib/private.h b/lib/private.h
index 7860ec2..968271d 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -91,12 +91,8 @@ typedef struct {
*/
vfu_region_access_cb_t *fn;
- /*
- * Callback function that is called when the region is memory mapped.
- * Required if VFU_REGION_FLAG_MEM is set, otherwise ignored.
- */
- vfu_map_region_cb_t *map;
struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */
+ int fd;
} vfu_reg_info_t;
struct pci_dev {
@@ -158,7 +154,7 @@ get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds,
int
exec_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size,
- int *fds, size_t nr_fds,
+ int *fds, size_t nr_fds, int **fds_out, size_t *nr_fds_out,
struct iovec *_iovecs, struct iovec **iovecs, size_t *nr_iovecs,
bool *free_iovec_data);
@@ -171,9 +167,10 @@ consume_fd(int *fds, size_t nr_fds, size_t index);
int
handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size,
struct vfio_device_info *dev_info);
+
long
dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg);
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds);
#endif /* LIB_VFIO_USER_PRIVATE_H */
diff --git a/lib/tran_sock.c b/lib/tran_sock.c
index f96784d..dd682db 100644
--- a/lib/tran_sock.c
+++ b/lib/tran_sock.c
@@ -214,6 +214,52 @@ vfu_send_error(int sock, uint16_t msg_id,
return vfu_send_iovec(sock, msg_id, true, cmd, NULL, 0, NULL, 0, error);
}
+static int
+get_msg(void *data, size_t len, int *fds, size_t *nr_fds, int sock_fd,
+ int sock_flags)
+{
+ int ret;
+ struct iovec iov = {.iov_base = data, .iov_len = len};
+ struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1};
+ struct cmsghdr *cmsg;
+
+ if (nr_fds != NULL && *nr_fds > 0) {
+ assert(fds != NULL);
+ msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds);
+ msg.msg_control = alloca(msg.msg_controllen);
+ *nr_fds = 0;
+ }
+
+ ret = recvmsg(sock_fd, &msg, sock_flags);
+ if (ret == -1) {
+ return -errno;
+ }
+
+ if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) {
+ return -EFAULT;
+ }
+
+ if (nr_fds != NULL) {
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
+ continue;
+ }
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) {
+ return -EINVAL;
+ }
+ int size = cmsg->cmsg_len - CMSG_LEN(0);
+ if (size % sizeof(int) != 0) {
+ return -EINVAL;
+ }
+ *nr_fds = (int)(size / sizeof(int));
+ memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int));
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* Receive a vfio-user message. If "len" is set to non-zero, the message should
* include data of that length, which is stored in the pre-allocated "data"
@@ -224,14 +270,15 @@ vfu_send_error(int sock, uint16_t msg_id,
* better.
*/
int
-vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
- uint16_t *msg_id, void *data, size_t *len)
+vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len, int *fds,
+ size_t *nr_fds)
{
int ret;
/* FIXME if ret == -1 then fcntl can overwrite recv's errno */
- ret = recv_blocking(sock, hdr, sizeof(*hdr), 0);
+ ret = get_msg(hdr, sizeof *hdr, fds, nr_fds, sock, 0);
if (ret == -1) {
return -errno;
}
@@ -275,6 +322,13 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
return 0;
}
+int
+vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len)
+{
+ return vfu_recv_fds(sock, hdr, is_reply, msg_id, data, len, NULL, NULL);
+}
+
/*
* Like vfu_recv(), but will automatically allocate reply data.
*
@@ -333,26 +387,29 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
int
vfu_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd,
struct iovec *iovecs, size_t nr_iovecs,
- int *send_fds, size_t fd_count,
+ int *send_fds, size_t send_fd_count,
struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len)
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count)
{
int ret = vfu_send_iovec(sock, msg_id, false, cmd, iovecs, nr_iovecs,
- send_fds, fd_count, 0);
+ send_fds, send_fd_count, 0);
if (ret < 0) {
return ret;
}
if (hdr == NULL) {
hdr = alloca(sizeof *hdr);
}
- return vfu_recv(sock, hdr, true, &msg_id, recv_data, &recv_len);
+ return vfu_recv_fds(sock, hdr, true, &msg_id, recv_data, &recv_len,
+ recv_fds, recv_fd_count);
}
int
-vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
- void *send_data, size_t send_len,
- struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len)
+vfu_msg_fds(int sock, uint16_t msg_id, enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len, int *recv_fds,
+ size_t *recv_fd_count)
{
/* [0] is for the header. */
struct iovec iovecs[2] = {
@@ -362,7 +419,18 @@ vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
}
};
return vfu_msg_iovec(sock, msg_id, cmd, iovecs, ARRAY_SIZE(iovecs),
- NULL, 0, hdr, recv_data, recv_len);
+ NULL, 0, hdr, recv_data, recv_len, recv_fds,
+ recv_fd_count);
+}
+
+int
+vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len)
+{
+ return vfu_msg_fds(sock, msg_id, cmd, send_data, send_len, hdr, recv_data,
+ recv_len, NULL, NULL);
}
/*
@@ -650,15 +718,7 @@ static int
get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
int *fds, size_t *nr_fds)
{
- int ret, sock_flags = 0;
- struct iovec iov = {.iov_base = hdr, .iov_len = sizeof *hdr};
- struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1};
- struct cmsghdr *cmsg;
-
- msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds);
- msg.msg_control = alloca(msg.msg_controllen);
-
- *nr_fds = 0;
+ int sock_flags = 0;
/*
* TODO ideally we should set O_NONBLOCK on the fd so that the syscall is
@@ -668,32 +728,7 @@ get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr,
if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) {
sock_flags = MSG_DONTWAIT | MSG_WAITALL;
}
- ret = recvmsg(vfu_ctx->conn_fd, &msg, sock_flags);
- if (ret == -1) {
- return -errno;
- }
-
- if (msg.msg_flags & MSG_CTRUNC || msg.msg_flags & MSG_TRUNC) {
- return -EFAULT;
- }
-
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
- continue;
- }
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) {
- return -EINVAL;
- }
- int size = cmsg->cmsg_len - CMSG_LEN(0);
- if (size % sizeof(int) != 0) {
- return -EINVAL;
- }
- *nr_fds = (int)(size / sizeof(int));
- memcpy(fds, CMSG_DATA(cmsg), *nr_fds * sizeof(int));
- break;
- }
-
- return ret;
+ return get_msg(hdr, sizeof *hdr, fds, nr_fds, vfu_ctx->conn_fd, sock_flags);
}
struct transport_ops sock_transport_ops = {
diff --git a/lib/tran_sock.h b/lib/tran_sock.h
index b764e9b..c4ed30c 100644
--- a/lib/tran_sock.h
+++ b/lib/tran_sock.h
@@ -92,6 +92,15 @@ vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
uint16_t *msg_id, void *data, size_t *len);
/*
+ * Same as vfu_recv except it receives passed file descriptors. See vfu_msg on
+ * the semantics of @fds and @nr_fds.
+ */
+int
+vfu_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
+ uint16_t *msg_id, void *data, size_t *len, int *fds,
+ size_t *nr_fds);
+
+/*
* Receive a message from the other end, but automatically allocate a buffer for
* it, which must be freed by the caller. If there is no data, *datap is set to
* NULL.
@@ -105,16 +114,25 @@ vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
* iovecs array should leave the first entry empty, as it will be used for the
* header.
*
- * If specified, the given fds are sent to the other side. @hdr is filled with
- * the reply header if non-NULL.
+ * If specified, the given @send_fds are sent to the other side. @hdr is filled
+ * with the reply header if non-NULL.
+ *
+ * @recv_fds and @recv_fd_count are used to receive file descriptors.
+ * If @recv_fd_count is NULL then @recv_fds is ignored and no file descriptors
+ * are received. If @recv_fd_count is non-NULL then it contains the number of
+ * file descriptors that can be stored in @recv_fds, in which case @recv_fds
+ * must point to sufficient memory. On return, @recv_fd_count contains the
+ * number of file decriptors actually received, which does not exceeed the
+ * original value of @recv_fd_count.
*/
int
vfu_msg_iovec(int sock, uint16_t msg_id,
enum vfio_user_command cmd,
struct iovec *iovecs, size_t nr_iovecs,
- int *send_fds, size_t fd_count,
+ int *send_fds, size_t send_fd_count,
struct vfio_user_header *hdr,
- void *recv_data, size_t recv_len);
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count);
/*
* Send and receive a message to the other end. @hdr is filled with the reply
@@ -127,6 +145,19 @@ vfu_msg(int sock, uint16_t msg_id,
struct vfio_user_header *hdr,
void *recv_data, size_t recv_len);
+/*
+ * Same as vfu_msg excecpt that file descriptors can be received, see
+ * vfu_msg_iovec for the semantics of @recv_fds and @recv_fd_count.
+ */
+int
+vfu_msg_fds(int sock, uint16_t msg_id,
+ enum vfio_user_command cmd,
+ void *send_data, size_t send_len,
+ struct vfio_user_header *hdr,
+ void *recv_data, size_t recv_len,
+ int *recv_fds, size_t *recv_fd_count);
+
+
#endif /* LIB_VFIO_USER_TRAN_SOCK_H */
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */