aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJohn Levon <john.levon@nutanix.com>2021-06-02 16:08:14 +0100
committerGitHub <noreply@github.com>2021-06-02 16:08:14 +0100
commit57684de8240fce4a277301a86a803842338762af (patch)
treeea1066e2ae4de34bd7b77f9fb7a26af40848b52f /lib
parentb8234a75d9ec2c95cb889c0cef27927f34ad9cbc (diff)
downloadlibvfio-user-57684de8240fce4a277301a86a803842338762af.zip
libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.gz
libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.bz2
replace max_msg_size with max_data_xfer_size (#541)
The previously specified max_msg_size had one major issue: it implied a (way too small) limit on the size of dirty bitmaps that could be requested by a client, and as a result a hard limit on memory region size. It seemed awkward to attempt to split up an unmap request instead. Instead, let most requests and replies be limited by their "natural" limits; for example, the number of booleans in VFIO_USER_SET_IRQS is limited by MSI-X count. For the requests that solicit or provide data - that is, VFIO_USER_DMA_READ/WRITE and VFIO_USER_REGION_READ/WRITE - we negotiate a new max_data_xfer_size value. These are much easier to split up into separate requests at the client side so should not present an implementation problem. For our server, chunking is implemented in vfu_dma_read/vfu_dma_write(). Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com> Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/libvfio-user.c145
-rw-r--r--lib/private.h11
-rw-r--r--lib/tran_sock.c38
-rw-r--r--lib/tran_sock.h2
4 files changed, 129 insertions, 67 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 452b15f..62f906a 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -252,6 +252,12 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
return false;
}
+ if (ra->count > SERVER_MAX_DATA_XFER_SIZE) {
+ vfu_log(vfu_ctx, LOG_ERR, "region access count too large (%u)",
+ ra->count);
+ return false;
+ }
+
if (cmd == VFIO_USER_REGION_WRITE && size - sizeof(*ra) != ra->count) {
vfu_log(vfu_ctx, LOG_ERR, "region write count too small: "
"expected %lu, got %u", size - sizeof(*ra), ra->count);
@@ -265,7 +271,6 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
return false;
}
- // FIXME: ->count unbounded (alloc), except for region size
// FIXME: need to audit later for wraparound
if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) {
vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx "
@@ -891,7 +896,7 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
}
if (msg->hdr.msg_size < sizeof(msg->hdr)) {
- vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %d in header",
+ vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %u in header",
msg->hdr.msg_id, msg->hdr.msg_size);
return false;
} else if (msg->hdr.msg_size == sizeof(msg->hdr) &&
@@ -900,6 +905,11 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
msg->hdr.msg_id, msg->hdr.cmd);
return false;
} else if (msg->hdr.msg_size > SERVER_MAX_MSG_SIZE) {
+ /*
+ * We know we can reject this: all normal requests shouldn't need this
+ * amount of space, including VFIO_USER_REGION_WRITE, which should be
+ * bound by max_data_xfer_size.
+ */
vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: size of %u is too large",
msg->hdr.msg_id, msg->hdr.msg_size);
return false;
@@ -1603,82 +1613,105 @@ vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt)
return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt);
}
-EXPORT int
-vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+static int
+vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd,
+ dma_sg_t *sg, void *data)
{
- struct vfio_user_dma_region_access *dma_recv;
- struct vfio_user_dma_region_access dma_send;
- uint64_t recv_size;
- int msg_id = 1, ret;
+ struct vfio_user_dma_region_access *dma_reply;
+ struct vfio_user_dma_region_access *dma_req;
+ struct vfio_user_dma_region_access dma;
+ static int msg_id = 1;
+ size_t remaining;
+ size_t count;
+ size_t rlen;
+ void *rbuf;
assert(vfu_ctx != NULL);
assert(sg != NULL);
- recv_size = sizeof(*dma_recv) + sg->length;
+ rlen = sizeof(struct vfio_user_dma_region_access) +
+ MIN(sg->length, vfu_ctx->client_max_data_xfer_size);
+
+ rbuf = calloc(1, rlen);
- dma_recv = calloc(recv_size, 1);
- if (dma_recv == NULL) {
+ if (rbuf == NULL) {
return -1;
}
- dma_send.addr = (uint64_t)sg->dma_addr;
- dma_send.count = sg->length;
- ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ,
- &dma_send, sizeof(dma_send), NULL,
- dma_recv, recv_size);
+ remaining = sg->length;
+ count = 0;
- if (ret < 0) {
- if (errno == ENOMSG) {
- vfu_reset_ctx(vfu_ctx, "closed");
- errno = ENOTCONN;
- } else if (errno == ECONNRESET) {
- vfu_reset_ctx(vfu_ctx, "reset");
- errno = ENOTCONN;
- }
+ if (cmd == VFIO_USER_DMA_READ) {
+ dma_req = &dma;
+ dma_reply = rbuf;
} else {
- /* FIXME no need for memcpy */
- memcpy(data, dma_recv->data, sg->length);
+ dma_req = rbuf;
+ dma_reply = &dma;
}
- free(dma_recv);
+ while (remaining > 0) {
+ int ret;
- return ret;
-}
+ dma_req->addr = (uint64_t)sg->dma_addr + count;
+ dma_req->count = MIN(remaining, vfu_ctx->client_max_data_xfer_size);
-EXPORT int
-vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
-{
- struct vfio_user_dma_region_access *dma_send, dma_recv;
- uint64_t send_size = sizeof(*dma_send) + sg->length;
- int msg_id = 1, ret;
+ if (cmd == VFIO_USER_DMA_WRITE) {
+ memcpy(rbuf + sizeof(*dma_req), data + count, dma_req->count);
- assert(vfu_ctx != NULL);
- assert(sg != NULL);
+ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_WRITE,
+ rbuf, rlen, NULL,
+ dma_reply, sizeof(*dma_reply));
+ } else {
+ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_READ,
+ dma_req, sizeof(*dma_req), NULL,
+ rbuf, rlen);
+ }
- dma_send = calloc(send_size, 1);
- if (dma_send == NULL) {
- return -1;
- }
- dma_send->addr = (uint64_t)sg->dma_addr;
- dma_send->count = sg->length;
- memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */
- ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE,
- dma_send, send_size, NULL,
- &dma_recv, sizeof(dma_recv));
+ if (ret < 0) {
+ ret = errno;
+ if (ret == ENOMSG) {
+ vfu_reset_ctx(vfu_ctx, "closed");
+ ret = ENOTCONN;
+ } else if (errno == ECONNRESET) {
+ vfu_reset_ctx(vfu_ctx, "reset");
+ ret = ENOTCONN;
+ }
+ free(rbuf);
+ return ERROR_INT(ret);
+ }
- if (ret < 0) {
- if (errno == ENOMSG) {
- vfu_reset_ctx(vfu_ctx, "closed");
- errno = ENOTCONN;
- } else if (errno == ECONNRESET) {
- vfu_reset_ctx(vfu_ctx, "reset");
- errno = ENOTCONN;
+ if (dma_reply->addr != dma_req->addr ||
+ dma_reply->count != dma_req->count) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad reply to DMA transfer: "
+ "request:%#lx,%lu reply:%#lx,%lu",
+ dma_req->addr, dma_req->count,
+ dma_reply->addr, dma_reply->count);
+ free(rbuf);
+ return ERROR_INT(EINVAL);
}
+
+ if (cmd == VFIO_USER_DMA_READ) {
+ memcpy(data + count, rbuf + sizeof(*dma_reply), dma_req->count);
+ }
+
+ count += dma_req->count;
+ remaining -= dma_req->count;
}
- free(dma_send);
+ free(rbuf);
+ return 0;
+}
- return ret;
+EXPORT int
+vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data);
+}
+
+EXPORT int
+vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data);
}
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lib/private.h b/lib/private.h
index c7c0627..f474e1f 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -37,6 +37,7 @@
#include "common.h"
#include "pci_caps.h"
+#include "vfio-user.h"
/*
* The main reason we limit the size of an individual DMA region from the client
@@ -46,7 +47,14 @@
#define MAX_DMA_SIZE (8 * ONE_TB)
#define MAX_DMA_REGIONS 16
-#define SERVER_MAX_MSG_SIZE 65536
+#define SERVER_MAX_DATA_XFER_SIZE (VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE)
+
+/*
+ * Enough to receive a VFIO_USER_REGION_WRITE of SERVER_MAX_DATA_XFER_SIZE.
+ */
+#define SERVER_MAX_MSG_SIZE (SERVER_MAX_DATA_XFER_SIZE + \
+ sizeof(struct vfio_user_header) + \
+ sizeof(struct vfio_user_region_access))
/*
* Structure used to hold an in-flight request+reply.
@@ -155,6 +163,7 @@ struct vfu_ctx {
vfu_dma_unregister_cb_t *dma_unregister;
int client_max_fds;
+ size_t client_max_data_xfer_size;
struct migration *migration;
diff --git a/lib/tran_sock.c b/lib/tran_sock.c
index 12b3321..f8b7004 100644
--- a/lib/tran_sock.c
+++ b/lib/tran_sock.c
@@ -63,7 +63,7 @@ tran_sock_send_iovec(int sock, uint16_t msg_id, bool is_reply,
int *fds, int count, int err)
{
int ret;
- struct vfio_user_header hdr = {.msg_id = msg_id};
+ struct vfio_user_header hdr = { .msg_id = msg_id };
struct msghdr msg;
size_t i;
size_t size = count * sizeof(*fds);
@@ -250,6 +250,10 @@ tran_sock_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
}
}
+ if (hdr->msg_size < sizeof(*hdr) || hdr->msg_size > SERVER_MAX_MSG_SIZE) {
+ return ERROR_INT(EINVAL);
+ }
+
if (len != NULL && *len > 0 && hdr->msg_size > sizeof(*hdr)) {
ret = recv(sock, data, MIN(hdr->msg_size - sizeof(*hdr), *len),
MSG_WAITALL);
@@ -276,8 +280,6 @@ tran_sock_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
/*
* Like tran_sock_recv(), but will automatically allocate reply data.
- *
- * FIXME: this does an unconstrained alloc of client-supplied data.
*/
int
tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
@@ -294,6 +296,7 @@ tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
}
assert(hdr->msg_size >= sizeof(*hdr));
+ assert(hdr->msg_size <= SERVER_MAX_MSG_SIZE);
len = hdr->msg_size - sizeof(*hdr);
@@ -464,6 +467,7 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx)
* {
* "capabilities": {
* "max_msg_fds": 32,
+ * "max_data_xfer_size": 1048576
* "migration": {
* "pgsize": 4096
* }
@@ -474,8 +478,8 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx)
* available in newer library versions, so we don't use it.
*/
int
-tran_parse_version_json(const char *json_str,
- int *client_max_fdsp, size_t *pgsizep)
+tran_parse_version_json(const char *json_str, int *client_max_fdsp,
+ size_t *client_max_data_xfer_sizep, size_t *pgsizep)
{
struct json_object *jo_caps = NULL;
struct json_object *jo_top = NULL;
@@ -508,6 +512,18 @@ tran_parse_version_json(const char *json_str,
}
}
+ if (json_object_object_get_ex(jo_caps, "max_data_xfer_size", &jo)) {
+ if (json_object_get_type(jo) != json_type_int) {
+ goto out;
+ }
+
+ errno = 0;
+ *client_max_data_xfer_sizep = (int)json_object_get_int64(jo);
+
+ if (errno != 0) {
+ goto out;
+ }
+ }
if (json_object_object_get_ex(jo_caps, "migration", &jo)) {
struct json_object *jo2 = NULL;
@@ -581,6 +597,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp,
}
vfu_ctx->client_max_fds = 1;
+ vfu_ctx->client_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE;
if (vlen > sizeof(*cversion)) {
const char *json_str = (const char *)cversion->data;
@@ -594,6 +611,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp,
}
ret = tran_parse_version_json(json_str, &vfu_ctx->client_max_fds,
+ &vfu_ctx->client_max_data_xfer_size,
&pgsize);
if (ret < 0) {
@@ -656,20 +674,20 @@ send_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t msg_id,
"{"
"\"capabilities\":{"
"\"max_msg_fds\":%u,"
- "\"max_msg_size\":%u"
+ "\"max_data_xfer_size\":%u"
"}"
- "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE);
+ "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE);
} else {
slen = snprintf(server_caps, sizeof(server_caps),
"{"
"\"capabilities\":{"
"\"max_msg_fds\":%u,"
- "\"max_msg_size\":%u,"
+ "\"max_data_xfer_size\":%u,"
"\"migration\":{"
"\"pgsize\":%zu"
"}"
"}"
- "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE,
+ "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE,
migration_get_pgsize(vfu_ctx->migration));
}
@@ -787,6 +805,8 @@ tran_sock_recv_body(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ts = vfu_ctx->tran_data;
+ assert(msg->in_size <= SERVER_MAX_MSG_SIZE);
+
msg->in_data = malloc(msg->in_size);
if (msg->in_data == NULL) {
diff --git a/lib/tran_sock.h b/lib/tran_sock.h
index dbf54b1..efc5d60 100644
--- a/lib/tran_sock.h
+++ b/lib/tran_sock.h
@@ -54,7 +54,7 @@ extern struct transport_ops tran_sock_ops;
*/
int
tran_parse_version_json(const char *json_str, int *client_max_fdsp,
- size_t *pgsizep);
+ size_t *client_max_data_xfer_sizep, size_t *pgsizep);
/*
* Send a message to the other end. The iovecs array should leave the first