aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/vfio-user.rst167
-rw-r--r--include/vfio-user.h2
-rw-r--r--lib/libvfio-user.c145
-rw-r--r--lib/private.h11
-rw-r--r--lib/tran_sock.c38
-rw-r--r--lib/tran_sock.h2
-rw-r--r--samples/client.c110
-rw-r--r--test/py/libvfio_user.py7
-rw-r--r--test/py/test_negotiate.py17
9 files changed, 304 insertions, 195 deletions
diff --git a/docs/vfio-user.rst b/docs/vfio-user.rst
index e801859..a5d2b43 100644
--- a/docs/vfio-user.rst
+++ b/docs/vfio-user.rst
@@ -317,9 +317,8 @@ Message sizes
Some requests have an ``argsz`` field. In a request, it defines the maximum
expected reply payload size, which should be at least the size of the fixed
-reply payload headers defined here. For messages that don't have a reply
-payload, it defines the size of the incoming request (not including the standard
-header); otherwise it's not related to the request message size.
+reply payload headers defined here. The *request* payload size is defined by the
+usual ``msg_size`` field in the header, not the ``argsz`` field.
In a reply, the server sets ``argsz`` field to the size needed for a full
payload size. This may be less than the requested maximum size. This may be
@@ -328,6 +327,11 @@ included in the reply, but the ``argsz`` field in the reply indicates the needed
size, allowing a client to allocate a larger buffer for holding the reply before
trying again.
+In addition, during negotiation (see `Version`_), the client and server may
+each specify a ``max_data_xfer_size`` value; this defines the maximum data that
+may be read or written via one of the ``VFIO_USER_DMA/REGION_READ/WRITE``
+messages; see `Read and Write Operations`_.
+
Protocol Specification
======================
@@ -475,43 +479,39 @@ version data 4 variable (including terminating NUL). Optional.
The version data is an optional UTF-8 encoded JSON byte array with the following
format:
-+--------------------+------------------+-----------------------------------+
-| Name | Type | Description |
-+====================+==================+===================================+
-| ``"capabilities"`` | collection of | Contains common capabilities that |
-| | name/value pairs | the sender supports. Optional. |
-+--------------------+------------------+-----------------------------------+
++--------------+--------+-----------------------------------+
+| Name | Type | Description |
++==============+========+===================================+
+| capabilities | object | Contains common capabilities that |
+| | | the sender supports. Optional. |
++--------------+--------+-----------------------------------+
Capabilities:
-+--------------------+------------------+-------------------------------------+
-| Name | Type | Description |
-+====================+==================+=====================================+
-| ``"max_msg_fds"`` | number | Maximum number of file descriptors |
-| | | that can be received by the sender |
-| | | in one message. Optional. If not |
-| | | specified then the receiver must |
-| | | assume ``"max_msg_fds"=1``. |
-+--------------------+------------------+-------------------------------------+
-| ``"max_msg_size"`` | number | Maximum message size in bytes that |
-| | | the receiver can handle, including |
-| | | the header. Optional. If not |
-| | | specified then the receiver must |
-| | | assume ``"max_msg_size"=4096``. |
-+--------------------+------------------+-------------------------------------+
-| ``"migration"`` | collection of | Migration capability parameters. If |
-| | name/value pairs | missing then migration is not |
-| | | supported by the sender. |
-+--------------------+------------------+-------------------------------------+
++--------------------+--------+------------------------------------------------+
+| Name | Type | Description |
++====================+========+================================================+
+| max_msg_fds | number | Maximum number of file descriptors that can be |
+| | | received by the sender in one message. |
+| | | Optional. If not specified then the receiver |
+| | | must assume a value of ``1``. |
++--------------------+--------+------------------------------------------------+
+| max_data_xfer_size | number | Maximum ``count`` for data transfer messages; |
+| | | see `Read and Write Operations`_. Optional, |
+| | | with a default value of 1048576 bytes. |
++--------------------+--------+------------------------------------------------+
+| migration | object | Migration capability parameters. If missing |
+| | | then migration is not supported by the sender. |
++--------------------+--------+------------------------------------------------+
The migration capability contains the following name/value pairs:
-+--------------+--------+-----------------------------------------------+
-| Name | Type | Description |
-+==============+========+===============================================+
-| ``"pgsize"`` | number | Page size of dirty pages bitmap. The smallest |
-| | | between the client and the server is used. |
-+--------------+--------+-----------------------------------------------+
++--------+--------+-----------------------------------------------+
+| Name | Type | Description |
++========+========+===============================================+
+| pgsize | number | Page size of dirty pages bitmap. The smallest |
+| | | between the client and the server is used. |
++--------+--------+-----------------------------------------------+
Reply
^^^^^
@@ -1302,6 +1302,9 @@ There is no payload in the reply.
Note that all of these operations must be supported by the client and/or server,
even if the corresponding memory or device region has been shared as mappable.
+The ``count`` field must not exceed the value of ``max_data_xfer_size`` of the
+peer, for both reads and writes.
+
``VFIO_USER_REGION_READ``
-------------------------
@@ -1315,16 +1318,16 @@ Request
+--------+--------+----------+
| Name | Offset | Size |
+========+========+==========+
-| Offset | 0 | 8 |
+| offset | 0 | 8 |
+--------+--------+----------+
-| Region | 8 | 4 |
+| region | 8 | 4 |
+--------+--------+----------+
-| Count | 12 | 4 |
+| count | 12 | 4 |
+--------+--------+----------+
-* *Offset* into the region being accessed.
-* *Region* is the index of the region being accessed.
-* *Count* is the size of the data to be transferred.
+* *offset* into the region being accessed.
+* *region* is the index of the region being accessed.
+* *count* is the size of the data to be transferred.
Reply
^^^^^
@@ -1332,19 +1335,19 @@ Reply
+--------+--------+----------+
| Name | Offset | Size |
+========+========+==========+
-| Offset | 0 | 8 |
+| offset | 0 | 8 |
+--------+--------+----------+
-| Region | 8 | 4 |
+| region | 8 | 4 |
+--------+--------+----------+
-| Count | 12 | 4 |
+| count | 12 | 4 |
+--------+--------+----------+
-| Data | 16 | variable |
+| data | 16 | variable |
+--------+--------+----------+
-* *Offset* into the region accessed.
-* *Region* is the index of the region accessed.
-* *Count* is the size of the data transferred.
-* *Data* is the data that was read from the device region.
+* *offset* into the region accessed.
+* *region* is the index of the region accessed.
+* *count* is the size of the data transferred.
+* *data* is the data that was read from the device region.
``VFIO_USER_REGION_WRITE``
--------------------------
@@ -1359,19 +1362,19 @@ Request
+--------+--------+----------+
| Name | Offset | Size |
+========+========+==========+
-| Offset | 0 | 8 |
+| offset | 0 | 8 |
+--------+--------+----------+
-| Region | 8 | 4 |
+| region | 8 | 4 |
+--------+--------+----------+
-| Count | 12 | 4 |
+| count | 12 | 4 |
+--------+--------+----------+
-| Data | 16 | variable |
+| data | 16 | variable |
+--------+--------+----------+
-* *Offset* into the region being accessed.
-* *Region* is the index of the region being accessed.
-* *Count* is the size of the data to be transferred.
-* *Data* is the data to write
+* *offset* into the region being accessed.
+* *region* is the index of the region being accessed.
+* *count* is the size of the data to be transferred.
+* *data* is the data to write
Reply
^^^^^
@@ -1379,16 +1382,16 @@ Reply
+--------+--------+----------+
| Name | Offset | Size |
+========+========+==========+
-| Offset | 0 | 8 |
+| offset | 0 | 8 |
+--------+--------+----------+
-| Region | 8 | 4 |
+| region | 8 | 4 |
+--------+--------+----------+
-| Count | 12 | 4 |
+| count | 12 | 4 |
+--------+--------+----------+
-* *Offset* into the region accessed.
-* *Region* is the index of the region accessed.
-* *Count* is the size of the data transferred.
+* *offset* into the region accessed.
+* *region* is the index of the region accessed.
+* *count* is the size of the data transferred.
``VFIO_USER_DMA_READ``
-----------------------
@@ -1402,14 +1405,14 @@ Request
+---------+--------+----------+
| Name | Offset | Size |
+=========+========+==========+
-| Address | 0 | 8 |
+| address | 0 | 8 |
+---------+--------+----------+
-| Count | 8 | 8 |
+| count | 8 | 8 |
+---------+--------+----------+
-* *Address* is the client DMA memory address being accessed. This address must have
+* *address* is the client DMA memory address being accessed. This address must have
been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message.
-* *Count* is the size of the data to be transferred.
+* *count* is the size of the data to be transferred.
Reply
^^^^^
@@ -1417,16 +1420,16 @@ Reply
+---------+--------+----------+
| Name | Offset | Size |
+=========+========+==========+
-| Address | 0 | 8 |
+| address | 0 | 8 |
+---------+--------+----------+
-| Count | 8 | 8 |
+| count | 8 | 8 |
+---------+--------+----------+
-| Data | 16 | variable |
+| data | 16 | variable |
+---------+--------+----------+
-* *Address* is the client DMA memory address being accessed.
-* *Count* is the size of the data transferred.
-* *Data* is the data read.
+* *address* is the client DMA memory address being accessed.
+* *count* is the size of the data transferred.
+* *data* is the data read.
``VFIO_USER_DMA_WRITE``
-----------------------
@@ -1440,17 +1443,17 @@ Request
+---------+--------+----------+
| Name | Offset | Size |
+=========+========+==========+
-| Address | 0 | 8 |
+| address | 0 | 8 |
+---------+--------+----------+
-| Count | 8 | 8 |
+| count | 8 | 8 |
+---------+--------+----------+
-| Data | 16 | variable |
+| data | 16 | variable |
+---------+--------+----------+
-* *Address* is the client DMA memory address being accessed. This address must have
+* *address* is the client DMA memory address being accessed. This address must have
been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message.
-* *Count* is the size of the data to be transferred.
-* *Data* is the data to write
+* *count* is the size of the data to be transferred.
+* *data* is the data to write
Reply
^^^^^
@@ -1458,13 +1461,13 @@ Reply
+---------+--------+----------+
| Name | Offset | Size |
+=========+========+==========+
-| Address | 0 | 8 |
+| address | 0 | 8 |
+---------+--------+----------+
-| Count | 8 | 4 |
+| count | 8 | 4 |
+---------+--------+----------+
-* *Address* is the client DMA memory address being accessed.
-* *Count* is the size of the data transferred.
+* *address* is the client DMA memory address being accessed.
+* *count* is the size of the data transferred.
``VFIO_USER_DEVICE_RESET``
--------------------------
diff --git a/include/vfio-user.h b/include/vfio-user.h
index 7be7cf7..43f6877 100644
--- a/include/vfio-user.h
+++ b/include/vfio-user.h
@@ -50,6 +50,8 @@
extern "C" {
#endif
+#define VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE (1024 * 1024)
+
enum vfio_user_command {
VFIO_USER_VERSION = 1,
VFIO_USER_DMA_MAP = 2,
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 452b15f..62f906a 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -252,6 +252,12 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
return false;
}
+ if (ra->count > SERVER_MAX_DATA_XFER_SIZE) {
+ vfu_log(vfu_ctx, LOG_ERR, "region access count too large (%u)",
+ ra->count);
+ return false;
+ }
+
if (cmd == VFIO_USER_REGION_WRITE && size - sizeof(*ra) != ra->count) {
vfu_log(vfu_ctx, LOG_ERR, "region write count too small: "
"expected %lu, got %u", size - sizeof(*ra), ra->count);
@@ -265,7 +271,6 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
return false;
}
- // FIXME: ->count unbounded (alloc), except for region size
// FIXME: need to audit later for wraparound
if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) {
vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx "
@@ -891,7 +896,7 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
}
if (msg->hdr.msg_size < sizeof(msg->hdr)) {
- vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %d in header",
+ vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %u in header",
msg->hdr.msg_id, msg->hdr.msg_size);
return false;
} else if (msg->hdr.msg_size == sizeof(msg->hdr) &&
@@ -900,6 +905,11 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
msg->hdr.msg_id, msg->hdr.cmd);
return false;
} else if (msg->hdr.msg_size > SERVER_MAX_MSG_SIZE) {
+ /*
+ * We know we can reject this: all normal requests shouldn't need this
+ * amount of space, including VFIO_USER_REGION_WRITE, which should be
+ * bound by max_data_xfer_size.
+ */
vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: size of %u is too large",
msg->hdr.msg_id, msg->hdr.msg_size);
return false;
@@ -1603,82 +1613,105 @@ vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt)
return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt);
}
-EXPORT int
-vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+static int
+vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd,
+ dma_sg_t *sg, void *data)
{
- struct vfio_user_dma_region_access *dma_recv;
- struct vfio_user_dma_region_access dma_send;
- uint64_t recv_size;
- int msg_id = 1, ret;
+ struct vfio_user_dma_region_access *dma_reply;
+ struct vfio_user_dma_region_access *dma_req;
+ struct vfio_user_dma_region_access dma;
+ static int msg_id = 1;
+ size_t remaining;
+ size_t count;
+ size_t rlen;
+ void *rbuf;
assert(vfu_ctx != NULL);
assert(sg != NULL);
- recv_size = sizeof(*dma_recv) + sg->length;
+ rlen = sizeof(struct vfio_user_dma_region_access) +
+ MIN(sg->length, vfu_ctx->client_max_data_xfer_size);
+
+ rbuf = calloc(1, rlen);
- dma_recv = calloc(recv_size, 1);
- if (dma_recv == NULL) {
+ if (rbuf == NULL) {
return -1;
}
- dma_send.addr = (uint64_t)sg->dma_addr;
- dma_send.count = sg->length;
- ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ,
- &dma_send, sizeof(dma_send), NULL,
- dma_recv, recv_size);
+ remaining = sg->length;
+ count = 0;
- if (ret < 0) {
- if (errno == ENOMSG) {
- vfu_reset_ctx(vfu_ctx, "closed");
- errno = ENOTCONN;
- } else if (errno == ECONNRESET) {
- vfu_reset_ctx(vfu_ctx, "reset");
- errno = ENOTCONN;
- }
+ if (cmd == VFIO_USER_DMA_READ) {
+ dma_req = &dma;
+ dma_reply = rbuf;
} else {
- /* FIXME no need for memcpy */
- memcpy(data, dma_recv->data, sg->length);
+ dma_req = rbuf;
+ dma_reply = &dma;
}
- free(dma_recv);
+ while (remaining > 0) {
+ int ret;
- return ret;
-}
+ dma_req->addr = (uint64_t)sg->dma_addr + count;
+ dma_req->count = MIN(remaining, vfu_ctx->client_max_data_xfer_size);
-EXPORT int
-vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
-{
- struct vfio_user_dma_region_access *dma_send, dma_recv;
- uint64_t send_size = sizeof(*dma_send) + sg->length;
- int msg_id = 1, ret;
+ if (cmd == VFIO_USER_DMA_WRITE) {
+ memcpy(rbuf + sizeof(*dma_req), data + count, dma_req->count);
- assert(vfu_ctx != NULL);
- assert(sg != NULL);
+ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_WRITE,
+ rbuf, rlen, NULL,
+ dma_reply, sizeof(*dma_reply));
+ } else {
+ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_READ,
+ dma_req, sizeof(*dma_req), NULL,
+ rbuf, rlen);
+ }
- dma_send = calloc(send_size, 1);
- if (dma_send == NULL) {
- return -1;
- }
- dma_send->addr = (uint64_t)sg->dma_addr;
- dma_send->count = sg->length;
- memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */
- ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE,
- dma_send, send_size, NULL,
- &dma_recv, sizeof(dma_recv));
+ if (ret < 0) {
+ ret = errno;
+ if (ret == ENOMSG) {
+ vfu_reset_ctx(vfu_ctx, "closed");
+ ret = ENOTCONN;
+ } else if (errno == ECONNRESET) {
+ vfu_reset_ctx(vfu_ctx, "reset");
+ ret = ENOTCONN;
+ }
+ free(rbuf);
+ return ERROR_INT(ret);
+ }
- if (ret < 0) {
- if (errno == ENOMSG) {
- vfu_reset_ctx(vfu_ctx, "closed");
- errno = ENOTCONN;
- } else if (errno == ECONNRESET) {
- vfu_reset_ctx(vfu_ctx, "reset");
- errno = ENOTCONN;
+ if (dma_reply->addr != dma_req->addr ||
+ dma_reply->count != dma_req->count) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad reply to DMA transfer: "
+ "request:%#lx,%lu reply:%#lx,%lu",
+ dma_req->addr, dma_req->count,
+ dma_reply->addr, dma_reply->count);
+ free(rbuf);
+ return ERROR_INT(EINVAL);
}
+
+ if (cmd == VFIO_USER_DMA_READ) {
+ memcpy(data + count, rbuf + sizeof(*dma_reply), dma_req->count);
+ }
+
+ count += dma_req->count;
+ remaining -= dma_req->count;
}
- free(dma_send);
+ free(rbuf);
+ return 0;
+}
- return ret;
+EXPORT int
+vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data);
+}
+
+EXPORT int
+vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data);
}
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lib/private.h b/lib/private.h
index c7c0627..f474e1f 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -37,6 +37,7 @@
#include "common.h"
#include "pci_caps.h"
+#include "vfio-user.h"
/*
* The main reason we limit the size of an individual DMA region from the client
@@ -46,7 +47,14 @@
#define MAX_DMA_SIZE (8 * ONE_TB)
#define MAX_DMA_REGIONS 16
-#define SERVER_MAX_MSG_SIZE 65536
+#define SERVER_MAX_DATA_XFER_SIZE (VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE)
+
+/*
+ * Enough to receive a VFIO_USER_REGION_WRITE of SERVER_MAX_DATA_XFER_SIZE.
+ */
+#define SERVER_MAX_MSG_SIZE (SERVER_MAX_DATA_XFER_SIZE + \
+ sizeof(struct vfio_user_header) + \
+ sizeof(struct vfio_user_region_access))
/*
* Structure used to hold an in-flight request+reply.
@@ -155,6 +163,7 @@ struct vfu_ctx {
vfu_dma_unregister_cb_t *dma_unregister;
int client_max_fds;
+ size_t client_max_data_xfer_size;
struct migration *migration;
diff --git a/lib/tran_sock.c b/lib/tran_sock.c
index 12b3321..f8b7004 100644
--- a/lib/tran_sock.c
+++ b/lib/tran_sock.c
@@ -63,7 +63,7 @@ tran_sock_send_iovec(int sock, uint16_t msg_id, bool is_reply,
int *fds, int count, int err)
{
int ret;
- struct vfio_user_header hdr = {.msg_id = msg_id};
+ struct vfio_user_header hdr = { .msg_id = msg_id };
struct msghdr msg;
size_t i;
size_t size = count * sizeof(*fds);
@@ -250,6 +250,10 @@ tran_sock_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply,
}
}
+ if (hdr->msg_size < sizeof(*hdr) || hdr->msg_size > SERVER_MAX_MSG_SIZE) {
+ return ERROR_INT(EINVAL);
+ }
+
if (len != NULL && *len > 0 && hdr->msg_size > sizeof(*hdr)) {
ret = recv(sock, data, MIN(hdr->msg_size - sizeof(*hdr), *len),
MSG_WAITALL);
@@ -276,8 +280,6 @@ tran_sock_recv(int sock, struct vfio_user_header *hdr, bool is_reply,
/*
* Like tran_sock_recv(), but will automatically allocate reply data.
- *
- * FIXME: this does an unconstrained alloc of client-supplied data.
*/
int
tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
@@ -294,6 +296,7 @@ tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply,
}
assert(hdr->msg_size >= sizeof(*hdr));
+ assert(hdr->msg_size <= SERVER_MAX_MSG_SIZE);
len = hdr->msg_size - sizeof(*hdr);
@@ -464,6 +467,7 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx)
* {
* "capabilities": {
* "max_msg_fds": 32,
+ * "max_data_xfer_size": 1048576
* "migration": {
* "pgsize": 4096
* }
@@ -474,8 +478,8 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx)
* available in newer library versions, so we don't use it.
*/
int
-tran_parse_version_json(const char *json_str,
- int *client_max_fdsp, size_t *pgsizep)
+tran_parse_version_json(const char *json_str, int *client_max_fdsp,
+ size_t *client_max_data_xfer_sizep, size_t *pgsizep)
{
struct json_object *jo_caps = NULL;
struct json_object *jo_top = NULL;
@@ -508,6 +512,18 @@ tran_parse_version_json(const char *json_str,
}
}
+ if (json_object_object_get_ex(jo_caps, "max_data_xfer_size", &jo)) {
+ if (json_object_get_type(jo) != json_type_int) {
+ goto out;
+ }
+
+ errno = 0;
+ *client_max_data_xfer_sizep = (int)json_object_get_int64(jo);
+
+ if (errno != 0) {
+ goto out;
+ }
+ }
if (json_object_object_get_ex(jo_caps, "migration", &jo)) {
struct json_object *jo2 = NULL;
@@ -581,6 +597,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp,
}
vfu_ctx->client_max_fds = 1;
+ vfu_ctx->client_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE;
if (vlen > sizeof(*cversion)) {
const char *json_str = (const char *)cversion->data;
@@ -594,6 +611,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp,
}
ret = tran_parse_version_json(json_str, &vfu_ctx->client_max_fds,
+ &vfu_ctx->client_max_data_xfer_size,
&pgsize);
if (ret < 0) {
@@ -656,20 +674,20 @@ send_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t msg_id,
"{"
"\"capabilities\":{"
"\"max_msg_fds\":%u,"
- "\"max_msg_size\":%u"
+ "\"max_data_xfer_size\":%u"
"}"
- "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE);
+ "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE);
} else {
slen = snprintf(server_caps, sizeof(server_caps),
"{"
"\"capabilities\":{"
"\"max_msg_fds\":%u,"
- "\"max_msg_size\":%u,"
+ "\"max_data_xfer_size\":%u,"
"\"migration\":{"
"\"pgsize\":%zu"
"}"
"}"
- "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE,
+ "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE,
migration_get_pgsize(vfu_ctx->migration));
}
@@ -787,6 +805,8 @@ tran_sock_recv_body(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ts = vfu_ctx->tran_data;
+ assert(msg->in_size <= SERVER_MAX_MSG_SIZE);
+
msg->in_data = malloc(msg->in_size);
if (msg->in_data == NULL) {
diff --git a/lib/tran_sock.h b/lib/tran_sock.h
index dbf54b1..efc5d60 100644
--- a/lib/tran_sock.h
+++ b/lib/tran_sock.h
@@ -54,7 +54,7 @@ extern struct transport_ops tran_sock_ops;
*/
int
tran_parse_version_json(const char *json_str, int *client_max_fdsp,
- size_t *pgsizep);
+ size_t *client_max_data_xfer_sizep, size_t *pgsizep);
/*
* Send a message to the other end. The iovecs array should leave the first
diff --git a/samples/client.c b/samples/client.c
index 12eee21..13880c5 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -51,6 +51,9 @@
#define CLIENT_MAX_FDS (32)
+/* This is low, so we get testing of vfu_dma_read/write() chunking. */
+#define CLIENT_MAX_DATA_XFER_SIZE (1024)
+
static char *irq_to_str[] = {
[VFU_DEV_INTX_IRQ] = "INTx",
[VFU_DEV_MSI_IRQ] = "MSI",
@@ -105,11 +108,12 @@ send_version(int sock)
"{"
"\"capabilities\":{"
"\"max_msg_fds\":%u,"
+ "\"max_data_xfer_size\":%u,"
"\"migration\":{"
"\"pgsize\":%zu"
"}"
"}"
- "}", CLIENT_MAX_FDS, sysconf(_SC_PAGESIZE));
+ "}", CLIENT_MAX_FDS, CLIENT_MAX_DATA_XFER_SIZE, sysconf(_SC_PAGESIZE));
cversion.major = LIB_VFIO_USER_MAJOR;
cversion.minor = LIB_VFIO_USER_MINOR;
@@ -130,7 +134,8 @@ send_version(int sock)
}
static void
-recv_version(int sock, int *server_max_fds, size_t *pgsize)
+recv_version(int sock, int *server_max_fds, size_t *server_max_data_xfer_size,
+ size_t *pgsize)
{
struct vfio_user_version *sversion = NULL;
struct vfio_user_header hdr;
@@ -167,6 +172,7 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize)
}
*server_max_fds = 1;
+ *server_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE;
*pgsize = sysconf(_SC_PAGESIZE);
if (vlen > sizeof(*sversion)) {
@@ -177,7 +183,8 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize)
errx(EXIT_FAILURE, "ignoring invalid JSON from server");
}
- ret = tran_parse_version_json(json_str, server_max_fds, pgsize);
+ ret = tran_parse_version_json(json_str, server_max_fds,
+ server_max_data_xfer_size, pgsize);
if (ret < 0) {
err(EXIT_FAILURE, "failed to parse server JSON \"%s\"", json_str);
@@ -188,10 +195,11 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize)
}
static void
-negotiate(int sock, int *server_max_fds, size_t *pgsize)
+negotiate(int sock, int *server_max_fds, size_t *server_max_data_xfer_size,
+ size_t *pgsize)
{
send_version(sock);
- recv_version(sock, server_max_fds, pgsize);
+ recv_version(sock, server_max_fds, server_max_data_xfer_size, pgsize);
}
static void
@@ -562,21 +570,27 @@ handle_dma_write(int sock, struct vfio_user_dma_map *dma_regions,
}
for (i = 0; i < nr_dma_regions; i++) {
- if (dma_regions[i].addr == dma_access.addr) {
- ret = pwrite(dma_region_fds[i], data, dma_access.count,
- dma_regions[i].offset);
- if (ret < 0) {
- err(EXIT_FAILURE,
- "failed to write data to fd=%d at %#lx-%#lx",
- dma_region_fds[i],
- dma_regions[i].offset,
- dma_regions[i].offset + dma_access.count - 1);
- }
- break;
- }
+ off_t offset;
+ ssize_t c;
+
+ if (dma_access.addr < dma_regions[i].addr ||
+ dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) {
+ continue;
+ }
+
+ offset = dma_regions[i].offset + dma_access.addr;
+
+ c = pwrite(dma_region_fds[i], data, dma_access.count, offset);
+
+ if (c != (ssize_t)dma_access.count) {
+ err(EXIT_FAILURE, "failed to write to fd=%d at [%#lx-%#lx)",
+ dma_region_fds[i], offset, offset + dma_access.count);
+ }
+ break;
}
- dma_access.count = 0;
+ assert(i != nr_dma_regions);
+
ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_WRITE,
&dma_access, sizeof(dma_access));
if (ret < 0) {
@@ -606,24 +620,36 @@ handle_dma_read(int sock, struct vfio_user_dma_map *dma_regions,
if (response == NULL) {
err(EXIT_FAILURE, NULL);
}
+ response->addr = dma_access.addr;
response->count = dma_access.count;
data = (char *)response->data;
for (i = 0; i < nr_dma_regions; i++) {
- if (dma_regions[i].addr == dma_access.addr) {
- if (pread(dma_region_fds[i], data, dma_access.count, dma_regions[i].offset) == -1) {
- err(EXIT_FAILURE, "failed to write data at %#lx-%#lx",
- dma_regions[i].offset,
- dma_regions[i].offset + dma_access.count);
- }
- break;
- }
+ off_t offset;
+ ssize_t c;
+
+ if (dma_access.addr < dma_regions[i].addr ||
+ dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) {
+ continue;
+ }
+
+ offset = dma_regions[i].offset + dma_access.addr;
+
+ c = pread(dma_region_fds[i], data, dma_access.count, offset);
+
+ if (c != (ssize_t)dma_access.count) {
+ err(EXIT_FAILURE, "failed to read from fd=%d at [%#lx-%#lx)",
+ dma_region_fds[i], offset, offset + dma_access.count);
+ }
+ break;
}
+ assert(i != nr_dma_regions);
+
ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_READ,
response, response_sz);
if (ret < 0) {
- err(EXIT_FAILURE, "failed to send reply of DMA write");
+ err(EXIT_FAILURE, "failed to send reply of DMA read");
}
free(response);
}
@@ -632,8 +658,14 @@ static void
handle_dma_io(int sock, struct vfio_user_dma_map *dma_regions,
int nr_dma_regions, int *dma_region_fds)
{
- handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds);
- handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds);
+ size_t i;
+
+ for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) {
+ handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds);
+ }
+ for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) {
+ handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds);
+ }
}
static void
@@ -678,12 +710,6 @@ get_dirty_bitmap(int sock, struct vfio_user_dma_map *dma_region)
free(data);
}
-enum migration {
- NO_MIGRATION,
- MIGRATION_SOURCE,
- MIGRATION_DESTINATION,
-};
-
static void
usage(char *argv0)
{
@@ -891,8 +917,9 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters,
static int
migrate_to(char *old_sock_path, int *server_max_fds,
- size_t *pgsize, size_t nr_iters, struct iovec *migr_iters,
- char *path_to_server, unsigned char *src_md5sum, size_t bar1_size)
+ size_t *server_max_data_xfer_size, size_t *pgsize, size_t nr_iters,
+ struct iovec *migr_iters, char *path_to_server,
+ unsigned char *src_md5sum, size_t bar1_size)
{
int ret, sock;
char *sock_path;
@@ -947,7 +974,7 @@ migrate_to(char *old_sock_path, int *server_max_fds,
sock = init_sock(sock_path);
free(sock_path);
- negotiate(sock, server_max_fds, pgsize);
+ negotiate(sock, server_max_fds, server_max_data_xfer_size, pgsize);
/* XXX set device state to resuming */
ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
@@ -1060,6 +1087,7 @@ int main(int argc, char *argv[])
int i;
FILE *fp;
int server_max_fds;
+ size_t server_max_data_xfer_size;
size_t pgsize;
int nr_dma_regions;
struct vfio_user_dirty_pages dirty_pages = {0};
@@ -1095,7 +1123,7 @@ int main(int argc, char *argv[])
*
* Do intial negotiation with the server, and discover parameters.
*/
- negotiate(sock, &server_max_fds, &pgsize);
+ negotiate(sock, &server_max_fds, &server_max_data_xfer_size, &pgsize);
/* try to access a bogus region, we should get an error */
ret = access_region(sock, 0xdeadbeef, false, 0, &ret, sizeof(ret));
@@ -1240,8 +1268,8 @@ int main(int argc, char *argv[])
err(EXIT_FAILURE, "failed to asprintf");
}
- sock = migrate_to(argv[optind], &server_max_fds, &pgsize,
- nr_iters, migr_iters, path_to_server,
+ sock = migrate_to(argv[optind], &server_max_fds, &server_max_data_xfer_size,
+ &pgsize, nr_iters, migr_iters, path_to_server,
md5sum, bar1_size);
free(path_to_server);
for (i = 0; i < (int)nr_iters; i++) {
diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py
index b1ebd4b..bdb812a 100644
--- a/test/py/libvfio_user.py
+++ b/test/py/libvfio_user.py
@@ -106,10 +106,13 @@ VFIO_USER_CLIENT_MAX_FDS_LIMIT = 1024
SERVER_MAX_FDS = 8
-SERVER_MAX_MSG_SIZE = 65536
+ONE_TB = (1024 * 1024 * 1024 * 1024)
+
+VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE = (1024 * 1024)
+SERVER_MAX_DATA_XFER_SIZE = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE
+SERVER_MAX_MSG_SIZE = SERVER_MAX_DATA_XFER_SIZE + 16 + 16
MAX_DMA_REGIONS = 16
-ONE_TB = (1024 * 1024 * 1024 * 1024)
MAX_DMA_SIZE = (8 * ONE_TB)
# enum vfio_user_command
diff --git a/test/py/test_negotiate.py b/test/py/test_negotiate.py
index e05b450..a3f2a8a 100644
--- a/test/py/test_negotiate.py
+++ b/test/py/test_negotiate.py
@@ -66,6 +66,15 @@ def test_short_write():
vfu_attach_ctx(ctx, expect=errno.EINVAL)
get_reply(sock, expect=errno.EINVAL)
+def test_long_write():
+ sock = connect_sock()
+ hdr = vfio_user_header(VFIO_USER_VERSION, size=SERVER_MAX_MSG_SIZE + 1)
+ sock.send(hdr)
+
+ ret = vfu_attach_ctx(ctx, expect=errno.EINVAL)
+ assert ret == -1
+ assert c.get_errno() == errno.EINVAL
+
def test_bad_command():
sock = connect_sock()
@@ -152,7 +161,7 @@ def test_valid_negotiate_no_json():
assert minor == LIBVFIO_USER_MINOR
json = parse_json(json_str)
assert json.capabilities.max_msg_fds == SERVER_MAX_FDS
- assert json.capabilities.max_msg_size == SERVER_MAX_MSG_SIZE
+ assert json.capabilities.max_data_xfer_size == SERVER_MAX_DATA_XFER_SIZE
# FIXME: migration object checks
disconnect_client(ctx, sock)
@@ -164,8 +173,10 @@ def test_valid_negotiate_empty_json():
vfu_run_ctx(ctx)
def test_valid_negotiate_json():
- client_version_json(json=bytes('{ "capabilities": { "max_msg_fds": %s } }' %
- VFIO_USER_CLIENT_MAX_FDS_LIMIT, "utf-8"))
+ client_version_json(json=bytes(
+ '{ "capabilities": { "max_msg_fds": %s, "max_data_xfer_size": %u } }' %
+ (VFIO_USER_CLIENT_MAX_FDS_LIMIT, VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE),
+ "utf-8"))
# notice client closed connection
vfu_run_ctx(ctx)