diff options
author | John Levon <john.levon@nutanix.com> | 2021-06-02 16:08:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-02 16:08:14 +0100 |
commit | 57684de8240fce4a277301a86a803842338762af (patch) | |
tree | ea1066e2ae4de34bd7b77f9fb7a26af40848b52f | |
parent | b8234a75d9ec2c95cb889c0cef27927f34ad9cbc (diff) | |
download | libvfio-user-57684de8240fce4a277301a86a803842338762af.zip libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.gz libvfio-user-57684de8240fce4a277301a86a803842338762af.tar.bz2 |
replace max_msg_size with max_data_xfer_size (#541)
The previously specified max_msg_size had one major issue: it implied a (way too
small) limit on the size of dirty bitmaps that could be requested by a client,
and as a result a hard limit on memory region size. It seemed awkward to attempt
to split up an unmap request instead.
Instead, let most requests and replies be limited by their "natural" limits; for
example, the number of booleans in VFIO_USER_SET_IRQS is limited by MSI-X count.
For the requests that solicit or provide data - that is, VFIO_USER_DMA_READ/WRITE
and VFIO_USER_REGION_READ/WRITE - we negotiate a new max_data_xfer_size value.
These are much easier to split up into separate requests at the client side
so should not present an implementation problem. For our server, chunking is
implemented in vfu_dma_read/vfu_dma_write().
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com>
Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
-rw-r--r-- | docs/vfio-user.rst | 167 | ||||
-rw-r--r-- | include/vfio-user.h | 2 | ||||
-rw-r--r-- | lib/libvfio-user.c | 145 | ||||
-rw-r--r-- | lib/private.h | 11 | ||||
-rw-r--r-- | lib/tran_sock.c | 38 | ||||
-rw-r--r-- | lib/tran_sock.h | 2 | ||||
-rw-r--r-- | samples/client.c | 110 | ||||
-rw-r--r-- | test/py/libvfio_user.py | 7 | ||||
-rw-r--r-- | test/py/test_negotiate.py | 17 |
9 files changed, 304 insertions, 195 deletions
diff --git a/docs/vfio-user.rst b/docs/vfio-user.rst index e801859..a5d2b43 100644 --- a/docs/vfio-user.rst +++ b/docs/vfio-user.rst @@ -317,9 +317,8 @@ Message sizes Some requests have an ``argsz`` field. In a request, it defines the maximum expected reply payload size, which should be at least the size of the fixed -reply payload headers defined here. For messages that don't have a reply -payload, it defines the size of the incoming request (not including the standard -header); otherwise it's not related to the request message size. +reply payload headers defined here. The *request* payload size is defined by the +usual ``msg_size`` field in the header, not the ``argsz`` field. In a reply, the server sets ``argsz`` field to the size needed for a full payload size. This may be less than the requested maximum size. This may be @@ -328,6 +327,11 @@ included in the reply, but the ``argsz`` field in the reply indicates the needed size, allowing a client to allocate a larger buffer for holding the reply before trying again. +In addition, during negotiation (see `Version`_), the client and server may +each specify a ``max_data_xfer_size`` value; this defines the maximum data that +may be read or written via one of the ``VFIO_USER_DMA/REGION_READ/WRITE`` +messages; see `Read and Write Operations`_. + Protocol Specification ====================== @@ -475,43 +479,39 @@ version data 4 variable (including terminating NUL). Optional. The version data is an optional UTF-8 encoded JSON byte array with the following format: -+--------------------+------------------+-----------------------------------+ -| Name | Type | Description | -+====================+==================+===================================+ -| ``"capabilities"`` | collection of | Contains common capabilities that | -| | name/value pairs | the sender supports. Optional. | -+--------------------+------------------+-----------------------------------+ ++--------------+--------+-----------------------------------+ +| Name | Type | Description | ++==============+========+===================================+ +| capabilities | object | Contains common capabilities that | +| | | the sender supports. Optional. | ++--------------+--------+-----------------------------------+ Capabilities: -+--------------------+------------------+-------------------------------------+ -| Name | Type | Description | -+====================+==================+=====================================+ -| ``"max_msg_fds"`` | number | Maximum number of file descriptors | -| | | that can be received by the sender | -| | | in one message. Optional. If not | -| | | specified then the receiver must | -| | | assume ``"max_msg_fds"=1``. | -+--------------------+------------------+-------------------------------------+ -| ``"max_msg_size"`` | number | Maximum message size in bytes that | -| | | the receiver can handle, including | -| | | the header. Optional. If not | -| | | specified then the receiver must | -| | | assume ``"max_msg_size"=4096``. | -+--------------------+------------------+-------------------------------------+ -| ``"migration"`` | collection of | Migration capability parameters. If | -| | name/value pairs | missing then migration is not | -| | | supported by the sender. | -+--------------------+------------------+-------------------------------------+ ++--------------------+--------+------------------------------------------------+ +| Name | Type | Description | ++====================+========+================================================+ +| max_msg_fds | number | Maximum number of file descriptors that can be | +| | | received by the sender in one message. | +| | | Optional. If not specified then the receiver | +| | | must assume a value of ``1``. | ++--------------------+--------+------------------------------------------------+ +| max_data_xfer_size | number | Maximum ``count`` for data transfer messages; | +| | | see `Read and Write Operations`_. Optional, | +| | | with a default value of 1048576 bytes. | ++--------------------+--------+------------------------------------------------+ +| migration | object | Migration capability parameters. If missing | +| | | then migration is not supported by the sender. | ++--------------------+--------+------------------------------------------------+ The migration capability contains the following name/value pairs: -+--------------+--------+-----------------------------------------------+ -| Name | Type | Description | -+==============+========+===============================================+ -| ``"pgsize"`` | number | Page size of dirty pages bitmap. The smallest | -| | | between the client and the server is used. | -+--------------+--------+-----------------------------------------------+ ++--------+--------+-----------------------------------------------+ +| Name | Type | Description | ++========+========+===============================================+ +| pgsize | number | Page size of dirty pages bitmap. The smallest | +| | | between the client and the server is used. | ++--------+--------+-----------------------------------------------+ Reply ^^^^^ @@ -1302,6 +1302,9 @@ There is no payload in the reply. Note that all of these operations must be supported by the client and/or server, even if the corresponding memory or device region has been shared as mappable. +The ``count`` field must not exceed the value of ``max_data_xfer_size`` of the +peer, for both reads and writes. + ``VFIO_USER_REGION_READ`` ------------------------- @@ -1315,16 +1318,16 @@ Request +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -* *Offset* into the region being accessed. -* *Region* is the index of the region being accessed. -* *Count* is the size of the data to be transferred. +* *offset* into the region being accessed. +* *region* is the index of the region being accessed. +* *count* is the size of the data to be transferred. Reply ^^^^^ @@ -1332,19 +1335,19 @@ Reply +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +--------+--------+----------+ -* *Offset* into the region accessed. -* *Region* is the index of the region accessed. -* *Count* is the size of the data transferred. -* *Data* is the data that was read from the device region. +* *offset* into the region accessed. +* *region* is the index of the region accessed. +* *count* is the size of the data transferred. +* *data* is the data that was read from the device region. ``VFIO_USER_REGION_WRITE`` -------------------------- @@ -1359,19 +1362,19 @@ Request +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +--------+--------+----------+ -* *Offset* into the region being accessed. -* *Region* is the index of the region being accessed. -* *Count* is the size of the data to be transferred. -* *Data* is the data to write +* *offset* into the region being accessed. +* *region* is the index of the region being accessed. +* *count* is the size of the data to be transferred. +* *data* is the data to write Reply ^^^^^ @@ -1379,16 +1382,16 @@ Reply +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -* *Offset* into the region accessed. -* *Region* is the index of the region accessed. -* *Count* is the size of the data transferred. +* *offset* into the region accessed. +* *region* is the index of the region accessed. +* *count* is the size of the data transferred. ``VFIO_USER_DMA_READ`` ----------------------- @@ -1402,14 +1405,14 @@ Request +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. This address must have +* *address* is the client DMA memory address being accessed. This address must have been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message. -* *Count* is the size of the data to be transferred. +* *count* is the size of the data to be transferred. Reply ^^^^^ @@ -1417,16 +1420,16 @@ Reply +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. -* *Count* is the size of the data transferred. -* *Data* is the data read. +* *address* is the client DMA memory address being accessed. +* *count* is the size of the data transferred. +* *data* is the data read. ``VFIO_USER_DMA_WRITE`` ----------------------- @@ -1440,17 +1443,17 @@ Request +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. This address must have +* *address* is the client DMA memory address being accessed. This address must have been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message. -* *Count* is the size of the data to be transferred. -* *Data* is the data to write +* *count* is the size of the data to be transferred. +* *data* is the data to write Reply ^^^^^ @@ -1458,13 +1461,13 @@ Reply +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 4 | +| count | 8 | 4 | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. -* *Count* is the size of the data transferred. +* *address* is the client DMA memory address being accessed. +* *count* is the size of the data transferred. ``VFIO_USER_DEVICE_RESET`` -------------------------- diff --git a/include/vfio-user.h b/include/vfio-user.h index 7be7cf7..43f6877 100644 --- a/include/vfio-user.h +++ b/include/vfio-user.h @@ -50,6 +50,8 @@ extern "C" { #endif +#define VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE (1024 * 1024) + enum vfio_user_command { VFIO_USER_VERSION = 1, VFIO_USER_DMA_MAP = 2, diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 452b15f..62f906a 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -252,6 +252,12 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } + if (ra->count > SERVER_MAX_DATA_XFER_SIZE) { + vfu_log(vfu_ctx, LOG_ERR, "region access count too large (%u)", + ra->count); + return false; + } + if (cmd == VFIO_USER_REGION_WRITE && size - sizeof(*ra) != ra->count) { vfu_log(vfu_ctx, LOG_ERR, "region write count too small: " "expected %lu, got %u", size - sizeof(*ra), ra->count); @@ -265,7 +271,6 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } - // FIXME: ->count unbounded (alloc), except for region size // FIXME: need to audit later for wraparound if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) { vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx " @@ -891,7 +896,7 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) } if (msg->hdr.msg_size < sizeof(msg->hdr)) { - vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %d in header", + vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %u in header", msg->hdr.msg_id, msg->hdr.msg_size); return false; } else if (msg->hdr.msg_size == sizeof(msg->hdr) && @@ -900,6 +905,11 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) msg->hdr.msg_id, msg->hdr.cmd); return false; } else if (msg->hdr.msg_size > SERVER_MAX_MSG_SIZE) { + /* + * We know we can reject this: all normal requests shouldn't need this + * amount of space, including VFIO_USER_REGION_WRITE, which should be + * bound by max_data_xfer_size. + */ vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: size of %u is too large", msg->hdr.msg_id, msg->hdr.msg_size); return false; @@ -1603,82 +1613,105 @@ vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt); } -EXPORT int -vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +static int +vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, + dma_sg_t *sg, void *data) { - struct vfio_user_dma_region_access *dma_recv; - struct vfio_user_dma_region_access dma_send; - uint64_t recv_size; - int msg_id = 1, ret; + struct vfio_user_dma_region_access *dma_reply; + struct vfio_user_dma_region_access *dma_req; + struct vfio_user_dma_region_access dma; + static int msg_id = 1; + size_t remaining; + size_t count; + size_t rlen; + void *rbuf; assert(vfu_ctx != NULL); assert(sg != NULL); - recv_size = sizeof(*dma_recv) + sg->length; + rlen = sizeof(struct vfio_user_dma_region_access) + + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); + + rbuf = calloc(1, rlen); - dma_recv = calloc(recv_size, 1); - if (dma_recv == NULL) { + if (rbuf == NULL) { return -1; } - dma_send.addr = (uint64_t)sg->dma_addr; - dma_send.count = sg->length; - ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ, - &dma_send, sizeof(dma_send), NULL, - dma_recv, recv_size); + remaining = sg->length; + count = 0; - if (ret < 0) { - if (errno == ENOMSG) { - vfu_reset_ctx(vfu_ctx, "closed"); - errno = ENOTCONN; - } else if (errno == ECONNRESET) { - vfu_reset_ctx(vfu_ctx, "reset"); - errno = ENOTCONN; - } + if (cmd == VFIO_USER_DMA_READ) { + dma_req = &dma; + dma_reply = rbuf; } else { - /* FIXME no need for memcpy */ - memcpy(data, dma_recv->data, sg->length); + dma_req = rbuf; + dma_reply = &dma; } - free(dma_recv); + while (remaining > 0) { + int ret; - return ret; -} + dma_req->addr = (uint64_t)sg->dma_addr + count; + dma_req->count = MIN(remaining, vfu_ctx->client_max_data_xfer_size); -EXPORT int -vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) -{ - struct vfio_user_dma_region_access *dma_send, dma_recv; - uint64_t send_size = sizeof(*dma_send) + sg->length; - int msg_id = 1, ret; + if (cmd == VFIO_USER_DMA_WRITE) { + memcpy(rbuf + sizeof(*dma_req), data + count, dma_req->count); - assert(vfu_ctx != NULL); - assert(sg != NULL); + ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_WRITE, + rbuf, rlen, NULL, + dma_reply, sizeof(*dma_reply)); + } else { + ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_READ, + dma_req, sizeof(*dma_req), NULL, + rbuf, rlen); + } - dma_send = calloc(send_size, 1); - if (dma_send == NULL) { - return -1; - } - dma_send->addr = (uint64_t)sg->dma_addr; - dma_send->count = sg->length; - memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ - ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE, - dma_send, send_size, NULL, - &dma_recv, sizeof(dma_recv)); + if (ret < 0) { + ret = errno; + if (ret == ENOMSG) { + vfu_reset_ctx(vfu_ctx, "closed"); + ret = ENOTCONN; + } else if (errno == ECONNRESET) { + vfu_reset_ctx(vfu_ctx, "reset"); + ret = ENOTCONN; + } + free(rbuf); + return ERROR_INT(ret); + } - if (ret < 0) { - if (errno == ENOMSG) { - vfu_reset_ctx(vfu_ctx, "closed"); - errno = ENOTCONN; - } else if (errno == ECONNRESET) { - vfu_reset_ctx(vfu_ctx, "reset"); - errno = ENOTCONN; + if (dma_reply->addr != dma_req->addr || + dma_reply->count != dma_req->count) { + vfu_log(vfu_ctx, LOG_ERR, "bad reply to DMA transfer: " + "request:%#lx,%lu reply:%#lx,%lu", + dma_req->addr, dma_req->count, + dma_reply->addr, dma_reply->count); + free(rbuf); + return ERROR_INT(EINVAL); } + + if (cmd == VFIO_USER_DMA_READ) { + memcpy(data + count, rbuf + sizeof(*dma_reply), dma_req->count); + } + + count += dma_req->count; + remaining -= dma_req->count; } - free(dma_send); + free(rbuf); + return 0; +} - return ret; +EXPORT int +vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data); +} + +EXPORT int +vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data); } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/private.h b/lib/private.h index c7c0627..f474e1f 100644 --- a/lib/private.h +++ b/lib/private.h @@ -37,6 +37,7 @@ #include "common.h" #include "pci_caps.h" +#include "vfio-user.h" /* * The main reason we limit the size of an individual DMA region from the client @@ -46,7 +47,14 @@ #define MAX_DMA_SIZE (8 * ONE_TB) #define MAX_DMA_REGIONS 16 -#define SERVER_MAX_MSG_SIZE 65536 +#define SERVER_MAX_DATA_XFER_SIZE (VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE) + +/* + * Enough to receive a VFIO_USER_REGION_WRITE of SERVER_MAX_DATA_XFER_SIZE. + */ +#define SERVER_MAX_MSG_SIZE (SERVER_MAX_DATA_XFER_SIZE + \ + sizeof(struct vfio_user_header) + \ + sizeof(struct vfio_user_region_access)) /* * Structure used to hold an in-flight request+reply. @@ -155,6 +163,7 @@ struct vfu_ctx { vfu_dma_unregister_cb_t *dma_unregister; int client_max_fds; + size_t client_max_data_xfer_size; struct migration *migration; diff --git a/lib/tran_sock.c b/lib/tran_sock.c index 12b3321..f8b7004 100644 --- a/lib/tran_sock.c +++ b/lib/tran_sock.c @@ -63,7 +63,7 @@ tran_sock_send_iovec(int sock, uint16_t msg_id, bool is_reply, int *fds, int count, int err) { int ret; - struct vfio_user_header hdr = {.msg_id = msg_id}; + struct vfio_user_header hdr = { .msg_id = msg_id }; struct msghdr msg; size_t i; size_t size = count * sizeof(*fds); @@ -250,6 +250,10 @@ tran_sock_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply, } } + if (hdr->msg_size < sizeof(*hdr) || hdr->msg_size > SERVER_MAX_MSG_SIZE) { + return ERROR_INT(EINVAL); + } + if (len != NULL && *len > 0 && hdr->msg_size > sizeof(*hdr)) { ret = recv(sock, data, MIN(hdr->msg_size - sizeof(*hdr), *len), MSG_WAITALL); @@ -276,8 +280,6 @@ tran_sock_recv(int sock, struct vfio_user_header *hdr, bool is_reply, /* * Like tran_sock_recv(), but will automatically allocate reply data. - * - * FIXME: this does an unconstrained alloc of client-supplied data. */ int tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, @@ -294,6 +296,7 @@ tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, } assert(hdr->msg_size >= sizeof(*hdr)); + assert(hdr->msg_size <= SERVER_MAX_MSG_SIZE); len = hdr->msg_size - sizeof(*hdr); @@ -464,6 +467,7 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx) * { * "capabilities": { * "max_msg_fds": 32, + * "max_data_xfer_size": 1048576 * "migration": { * "pgsize": 4096 * } @@ -474,8 +478,8 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx) * available in newer library versions, so we don't use it. */ int -tran_parse_version_json(const char *json_str, - int *client_max_fdsp, size_t *pgsizep) +tran_parse_version_json(const char *json_str, int *client_max_fdsp, + size_t *client_max_data_xfer_sizep, size_t *pgsizep) { struct json_object *jo_caps = NULL; struct json_object *jo_top = NULL; @@ -508,6 +512,18 @@ tran_parse_version_json(const char *json_str, } } + if (json_object_object_get_ex(jo_caps, "max_data_xfer_size", &jo)) { + if (json_object_get_type(jo) != json_type_int) { + goto out; + } + + errno = 0; + *client_max_data_xfer_sizep = (int)json_object_get_int64(jo); + + if (errno != 0) { + goto out; + } + } if (json_object_object_get_ex(jo_caps, "migration", &jo)) { struct json_object *jo2 = NULL; @@ -581,6 +597,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp, } vfu_ctx->client_max_fds = 1; + vfu_ctx->client_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE; if (vlen > sizeof(*cversion)) { const char *json_str = (const char *)cversion->data; @@ -594,6 +611,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp, } ret = tran_parse_version_json(json_str, &vfu_ctx->client_max_fds, + &vfu_ctx->client_max_data_xfer_size, &pgsize); if (ret < 0) { @@ -656,20 +674,20 @@ send_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t msg_id, "{" "\"capabilities\":{" "\"max_msg_fds\":%u," - "\"max_msg_size\":%u" + "\"max_data_xfer_size\":%u" "}" - "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE); + "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE); } else { slen = snprintf(server_caps, sizeof(server_caps), "{" "\"capabilities\":{" "\"max_msg_fds\":%u," - "\"max_msg_size\":%u," + "\"max_data_xfer_size\":%u," "\"migration\":{" "\"pgsize\":%zu" "}" "}" - "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE, + "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE, migration_get_pgsize(vfu_ctx->migration)); } @@ -787,6 +805,8 @@ tran_sock_recv_body(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ts = vfu_ctx->tran_data; + assert(msg->in_size <= SERVER_MAX_MSG_SIZE); + msg->in_data = malloc(msg->in_size); if (msg->in_data == NULL) { diff --git a/lib/tran_sock.h b/lib/tran_sock.h index dbf54b1..efc5d60 100644 --- a/lib/tran_sock.h +++ b/lib/tran_sock.h @@ -54,7 +54,7 @@ extern struct transport_ops tran_sock_ops; */ int tran_parse_version_json(const char *json_str, int *client_max_fdsp, - size_t *pgsizep); + size_t *client_max_data_xfer_sizep, size_t *pgsizep); /* * Send a message to the other end. The iovecs array should leave the first diff --git a/samples/client.c b/samples/client.c index 12eee21..13880c5 100644 --- a/samples/client.c +++ b/samples/client.c @@ -51,6 +51,9 @@ #define CLIENT_MAX_FDS (32) +/* This is low, so we get testing of vfu_dma_read/write() chunking. */ +#define CLIENT_MAX_DATA_XFER_SIZE (1024) + static char *irq_to_str[] = { [VFU_DEV_INTX_IRQ] = "INTx", [VFU_DEV_MSI_IRQ] = "MSI", @@ -105,11 +108,12 @@ send_version(int sock) "{" "\"capabilities\":{" "\"max_msg_fds\":%u," + "\"max_data_xfer_size\":%u," "\"migration\":{" "\"pgsize\":%zu" "}" "}" - "}", CLIENT_MAX_FDS, sysconf(_SC_PAGESIZE)); + "}", CLIENT_MAX_FDS, CLIENT_MAX_DATA_XFER_SIZE, sysconf(_SC_PAGESIZE)); cversion.major = LIB_VFIO_USER_MAJOR; cversion.minor = LIB_VFIO_USER_MINOR; @@ -130,7 +134,8 @@ send_version(int sock) } static void -recv_version(int sock, int *server_max_fds, size_t *pgsize) +recv_version(int sock, int *server_max_fds, size_t *server_max_data_xfer_size, + size_t *pgsize) { struct vfio_user_version *sversion = NULL; struct vfio_user_header hdr; @@ -167,6 +172,7 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) } *server_max_fds = 1; + *server_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE; *pgsize = sysconf(_SC_PAGESIZE); if (vlen > sizeof(*sversion)) { @@ -177,7 +183,8 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) errx(EXIT_FAILURE, "ignoring invalid JSON from server"); } - ret = tran_parse_version_json(json_str, server_max_fds, pgsize); + ret = tran_parse_version_json(json_str, server_max_fds, + server_max_data_xfer_size, pgsize); if (ret < 0) { err(EXIT_FAILURE, "failed to parse server JSON \"%s\"", json_str); @@ -188,10 +195,11 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) } static void -negotiate(int sock, int *server_max_fds, size_t *pgsize) +negotiate(int sock, int *server_max_fds, size_t *server_max_data_xfer_size, + size_t *pgsize) { send_version(sock); - recv_version(sock, server_max_fds, pgsize); + recv_version(sock, server_max_fds, server_max_data_xfer_size, pgsize); } static void @@ -562,21 +570,27 @@ handle_dma_write(int sock, struct vfio_user_dma_map *dma_regions, } for (i = 0; i < nr_dma_regions; i++) { - if (dma_regions[i].addr == dma_access.addr) { - ret = pwrite(dma_region_fds[i], data, dma_access.count, - dma_regions[i].offset); - if (ret < 0) { - err(EXIT_FAILURE, - "failed to write data to fd=%d at %#lx-%#lx", - dma_region_fds[i], - dma_regions[i].offset, - dma_regions[i].offset + dma_access.count - 1); - } - break; - } + off_t offset; + ssize_t c; + + if (dma_access.addr < dma_regions[i].addr || + dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) { + continue; + } + + offset = dma_regions[i].offset + dma_access.addr; + + c = pwrite(dma_region_fds[i], data, dma_access.count, offset); + + if (c != (ssize_t)dma_access.count) { + err(EXIT_FAILURE, "failed to write to fd=%d at [%#lx-%#lx)", + dma_region_fds[i], offset, offset + dma_access.count); + } + break; } - dma_access.count = 0; + assert(i != nr_dma_regions); + ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_WRITE, &dma_access, sizeof(dma_access)); if (ret < 0) { @@ -606,24 +620,36 @@ handle_dma_read(int sock, struct vfio_user_dma_map *dma_regions, if (response == NULL) { err(EXIT_FAILURE, NULL); } + response->addr = dma_access.addr; response->count = dma_access.count; data = (char *)response->data; for (i = 0; i < nr_dma_regions; i++) { - if (dma_regions[i].addr == dma_access.addr) { - if (pread(dma_region_fds[i], data, dma_access.count, dma_regions[i].offset) == -1) { - err(EXIT_FAILURE, "failed to write data at %#lx-%#lx", - dma_regions[i].offset, - dma_regions[i].offset + dma_access.count); - } - break; - } + off_t offset; + ssize_t c; + + if (dma_access.addr < dma_regions[i].addr || + dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) { + continue; + } + + offset = dma_regions[i].offset + dma_access.addr; + + c = pread(dma_region_fds[i], data, dma_access.count, offset); + + if (c != (ssize_t)dma_access.count) { + err(EXIT_FAILURE, "failed to read from fd=%d at [%#lx-%#lx)", + dma_region_fds[i], offset, offset + dma_access.count); + } + break; } + assert(i != nr_dma_regions); + ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_READ, response, response_sz); if (ret < 0) { - err(EXIT_FAILURE, "failed to send reply of DMA write"); + err(EXIT_FAILURE, "failed to send reply of DMA read"); } free(response); } @@ -632,8 +658,14 @@ static void handle_dma_io(int sock, struct vfio_user_dma_map *dma_regions, int nr_dma_regions, int *dma_region_fds) { - handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds); - handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds); + size_t i; + + for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) { + handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds); + } + for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) { + handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds); + } } static void @@ -678,12 +710,6 @@ get_dirty_bitmap(int sock, struct vfio_user_dma_map *dma_region) free(data); } -enum migration { - NO_MIGRATION, - MIGRATION_SOURCE, - MIGRATION_DESTINATION, -}; - static void usage(char *argv0) { @@ -891,8 +917,9 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, static int migrate_to(char *old_sock_path, int *server_max_fds, - size_t *pgsize, size_t nr_iters, struct iovec *migr_iters, - char *path_to_server, unsigned char *src_md5sum, size_t bar1_size) + size_t *server_max_data_xfer_size, size_t *pgsize, size_t nr_iters, + struct iovec *migr_iters, char *path_to_server, + unsigned char *src_md5sum, size_t bar1_size) { int ret, sock; char *sock_path; @@ -947,7 +974,7 @@ migrate_to(char *old_sock_path, int *server_max_fds, sock = init_sock(sock_path); free(sock_path); - negotiate(sock, server_max_fds, pgsize); + negotiate(sock, server_max_fds, server_max_data_xfer_size, pgsize); /* XXX set device state to resuming */ ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, @@ -1060,6 +1087,7 @@ int main(int argc, char *argv[]) int i; FILE *fp; int server_max_fds; + size_t server_max_data_xfer_size; size_t pgsize; int nr_dma_regions; struct vfio_user_dirty_pages dirty_pages = {0}; @@ -1095,7 +1123,7 @@ int main(int argc, char *argv[]) * * Do intial negotiation with the server, and discover parameters. */ - negotiate(sock, &server_max_fds, &pgsize); + negotiate(sock, &server_max_fds, &server_max_data_xfer_size, &pgsize); /* try to access a bogus region, we should get an error */ ret = access_region(sock, 0xdeadbeef, false, 0, &ret, sizeof(ret)); @@ -1240,8 +1268,8 @@ int main(int argc, char *argv[]) err(EXIT_FAILURE, "failed to asprintf"); } - sock = migrate_to(argv[optind], &server_max_fds, &pgsize, - nr_iters, migr_iters, path_to_server, + sock = migrate_to(argv[optind], &server_max_fds, &server_max_data_xfer_size, + &pgsize, nr_iters, migr_iters, path_to_server, md5sum, bar1_size); free(path_to_server); for (i = 0; i < (int)nr_iters; i++) { diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py index b1ebd4b..bdb812a 100644 --- a/test/py/libvfio_user.py +++ b/test/py/libvfio_user.py @@ -106,10 +106,13 @@ VFIO_USER_CLIENT_MAX_FDS_LIMIT = 1024 SERVER_MAX_FDS = 8 -SERVER_MAX_MSG_SIZE = 65536 +ONE_TB = (1024 * 1024 * 1024 * 1024) + +VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE = (1024 * 1024) +SERVER_MAX_DATA_XFER_SIZE = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE +SERVER_MAX_MSG_SIZE = SERVER_MAX_DATA_XFER_SIZE + 16 + 16 MAX_DMA_REGIONS = 16 -ONE_TB = (1024 * 1024 * 1024 * 1024) MAX_DMA_SIZE = (8 * ONE_TB) # enum vfio_user_command diff --git a/test/py/test_negotiate.py b/test/py/test_negotiate.py index e05b450..a3f2a8a 100644 --- a/test/py/test_negotiate.py +++ b/test/py/test_negotiate.py @@ -66,6 +66,15 @@ def test_short_write(): vfu_attach_ctx(ctx, expect=errno.EINVAL) get_reply(sock, expect=errno.EINVAL) +def test_long_write(): + sock = connect_sock() + hdr = vfio_user_header(VFIO_USER_VERSION, size=SERVER_MAX_MSG_SIZE + 1) + sock.send(hdr) + + ret = vfu_attach_ctx(ctx, expect=errno.EINVAL) + assert ret == -1 + assert c.get_errno() == errno.EINVAL + def test_bad_command(): sock = connect_sock() @@ -152,7 +161,7 @@ def test_valid_negotiate_no_json(): assert minor == LIBVFIO_USER_MINOR json = parse_json(json_str) assert json.capabilities.max_msg_fds == SERVER_MAX_FDS - assert json.capabilities.max_msg_size == SERVER_MAX_MSG_SIZE + assert json.capabilities.max_data_xfer_size == SERVER_MAX_DATA_XFER_SIZE # FIXME: migration object checks disconnect_client(ctx, sock) @@ -164,8 +173,10 @@ def test_valid_negotiate_empty_json(): vfu_run_ctx(ctx) def test_valid_negotiate_json(): - client_version_json(json=bytes('{ "capabilities": { "max_msg_fds": %s } }' % - VFIO_USER_CLIENT_MAX_FDS_LIMIT, "utf-8")) + client_version_json(json=bytes( + '{ "capabilities": { "max_msg_fds": %s, "max_data_xfer_size": %u } }' % + (VFIO_USER_CLIENT_MAX_FDS_LIMIT, VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE), + "utf-8")) # notice client closed connection vfu_run_ctx(ctx) |