replace max_msg_size with max_data_xfer_size (#541)

The previously specified max_msg_size had one major issue: it implied a (way too small) limit on the size of dirty bitmaps that could be requested by a client, and as a result a hard limit on memory region size. It seemed awkward to attempt to split up an unmap request instead. Instead, let most requests and replies be limited by their "natural" limits; for example, the number of booleans in VFIO_USER_SET_IRQS is limited by MSI-X count. For the requests that solicit or provide data - that is, VFIO_USER_DMA_READ/WRITE and VFIO_USER_REGION_READ/WRITE - we negotiate a new max_data_xfer_size value. These are much easier to split up into separate requests at the client side so should not present an implementation problem. For our server, chunking is implemented in vfu_dma_read/vfu_dma_write(). Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com> Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
author: John Levon <john.levon@nutanix.com> 2021-06-02 16:08:14 +0100
committer: GitHub <noreply@github.com> 2021-06-02 16:08:14 +0100
commit: 57684de8240fce4a277301a86a803842338762af (patch)
tree: ea1066e2ae4de34bd7b77f9fb7a26af40848b52f /lib/libvfio-user.c
parent: b8234a75d9ec2c95cb889c0cef27927f34ad9cbc (diff)
download: libvfio-user-57684de8240fce4a277301a86a803842338762af.zip
libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.gz
libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.bz2
1 files changed, 89 insertions, 56 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 452b15f..62f906a 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -252,6 +252,12 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
         return false;
     }
 
+    if (ra->count > SERVER_MAX_DATA_XFER_SIZE) {
+        vfu_log(vfu_ctx, LOG_ERR, "region access count too large (%u)",
+                ra->count);
+        return false;
+    }
+
     if (cmd == VFIO_USER_REGION_WRITE && size - sizeof(*ra) != ra->count) {
         vfu_log(vfu_ctx, LOG_ERR, "region write count too small: "
                 "expected %lu, got %u", size - sizeof(*ra), ra->count);
@@ -265,7 +271,6 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
         return false;
     }
 
-    // FIXME: ->count unbounded (alloc), except for region size
     // FIXME: need to audit later for wraparound
     if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) {
         vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx "
@@ -891,7 +896,7 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
     }
 
     if (msg->hdr.msg_size < sizeof(msg->hdr)) {
-        vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %d in header",
+        vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %u in header",
                 msg->hdr.msg_id, msg->hdr.msg_size);
         return false;
     } else if (msg->hdr.msg_size == sizeof(msg->hdr) &&
@@ -900,6 +905,11 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
                 msg->hdr.msg_id, msg->hdr.cmd);
         return false;
     } else if (msg->hdr.msg_size > SERVER_MAX_MSG_SIZE) {
+        /*
+         * We know we can reject this: all normal requests shouldn't need this
+         * amount of space, including VFIO_USER_REGION_WRITE, which should be
+         * bound by max_data_xfer_size.
+         */
         vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: size of %u is too large",
                 msg->hdr.msg_id, msg->hdr.msg_size);
         return false;
@@ -1603,82 +1613,105 @@ vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt)
     return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt);
 }
 
-EXPORT int
-vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+static int
+vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd,
+                 dma_sg_t *sg, void *data)
 {
-    struct vfio_user_dma_region_access *dma_recv;
-    struct vfio_user_dma_region_access dma_send;
-    uint64_t recv_size;
-    int msg_id = 1, ret;
+    struct vfio_user_dma_region_access *dma_reply;
+    struct vfio_user_dma_region_access *dma_req;
+    struct vfio_user_dma_region_access dma;
+    static int msg_id = 1;
+    size_t remaining;
+    size_t count;
+    size_t rlen;
+    void *rbuf;
 
     assert(vfu_ctx != NULL);
     assert(sg != NULL);
 
-    recv_size = sizeof(*dma_recv) + sg->length;
+    rlen = sizeof(struct vfio_user_dma_region_access) +
+           MIN(sg->length, vfu_ctx->client_max_data_xfer_size);
+
+    rbuf = calloc(1, rlen);
 
-    dma_recv = calloc(recv_size, 1);
-    if (dma_recv == NULL) {
+    if (rbuf == NULL) {
         return -1;
     }
 
-    dma_send.addr = (uint64_t)sg->dma_addr;
-    dma_send.count = sg->length;
-    ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ,
-                                  &dma_send, sizeof(dma_send), NULL,
-                                  dma_recv, recv_size);
+    remaining = sg->length;
+    count = 0;
 
-    if (ret < 0) {
-        if (errno == ENOMSG) {
-            vfu_reset_ctx(vfu_ctx, "closed");
-            errno = ENOTCONN;
-        } else if (errno == ECONNRESET) {
-            vfu_reset_ctx(vfu_ctx, "reset");
-            errno = ENOTCONN;
-        }
+    if (cmd == VFIO_USER_DMA_READ) {
+        dma_req = &dma;
+        dma_reply = rbuf;
     } else {
-        /* FIXME no need for memcpy */
-        memcpy(data, dma_recv->data, sg->length);
+        dma_req = rbuf;
+        dma_reply = &dma;
     }
 
-    free(dma_recv);
+    while (remaining > 0) {
+        int ret;
 
-    return ret;
-}
+        dma_req->addr = (uint64_t)sg->dma_addr + count;
+        dma_req->count = MIN(remaining, vfu_ctx->client_max_data_xfer_size);
 
-EXPORT int
-vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
-{
-    struct vfio_user_dma_region_access *dma_send, dma_recv;
-    uint64_t send_size = sizeof(*dma_send) + sg->length;
-    int msg_id = 1, ret;
+        if (cmd == VFIO_USER_DMA_WRITE) {
+            memcpy(rbuf + sizeof(*dma_req), data + count, dma_req->count);
 
-    assert(vfu_ctx != NULL);
-    assert(sg != NULL);
+            ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_WRITE,
+                                          rbuf, rlen, NULL,
+                                          dma_reply, sizeof(*dma_reply));
+        } else {
+            ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_READ,
+                                          dma_req, sizeof(*dma_req), NULL,
+                                          rbuf, rlen);
+        }
 
-    dma_send = calloc(send_size, 1);
-    if (dma_send == NULL) {
-        return -1;
-    }
-    dma_send->addr = (uint64_t)sg->dma_addr;
-    dma_send->count = sg->length;
-    memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */
-    ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE,
-                                  dma_send, send_size, NULL,
-                                  &dma_recv, sizeof(dma_recv));
+        if (ret < 0) {
+            ret = errno;
+            if (ret == ENOMSG) {
+                vfu_reset_ctx(vfu_ctx, "closed");
+                ret = ENOTCONN;
+            } else if (errno == ECONNRESET) {
+                vfu_reset_ctx(vfu_ctx, "reset");
+                ret = ENOTCONN;
+            }
+            free(rbuf);
+            return ERROR_INT(ret);
+        }
 
-    if (ret < 0) {
-        if (errno == ENOMSG) {
-            vfu_reset_ctx(vfu_ctx, "closed");
-            errno = ENOTCONN;
-        } else if (errno == ECONNRESET) {
-            vfu_reset_ctx(vfu_ctx, "reset");
-            errno = ENOTCONN;
+        if (dma_reply->addr != dma_req->addr ||
+            dma_reply->count != dma_req->count) {
+            vfu_log(vfu_ctx, LOG_ERR, "bad reply to DMA transfer: "
+                    "request:%#lx,%lu reply:%#lx,%lu",
+                    dma_req->addr, dma_req->count,
+                    dma_reply->addr, dma_reply->count);
+            free(rbuf);
+            return ERROR_INT(EINVAL);
         }
+
+        if (cmd == VFIO_USER_DMA_READ) {
+            memcpy(data + count, rbuf + sizeof(*dma_reply), dma_req->count);
+        }
+
+        count += dma_req->count;
+        remaining -= dma_req->count;
     }
 
-    free(dma_send);
+    free(rbuf);
+    return 0;
+}
 
-    return ret;
+EXPORT int
+vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+    return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data);
+}
+
+EXPORT int
+vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+    return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data);
 }
 
 /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
author	John Levon <john.levon@nutanix.com>	2021-06-02 16:08:14 +0100
committer	GitHub <noreply@github.com>	2021-06-02 16:08:14 +0100
commit	57684de8240fce4a277301a86a803842338762af (patch)
tree	ea1066e2ae4de34bd7b77f9fb7a26af40848b52f /lib/libvfio-user.c
parent	b8234a75d9ec2c95cb889c0cef27927f34ad9cbc (diff)
download	libvfio-user-57684de8240fce4a277301a86a803842338762af.zip libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.gz libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.bz2