diff options
-rw-r--r-- | docs/vfio-user.rst | 167 | ||||
-rw-r--r-- | include/vfio-user.h | 2 | ||||
-rw-r--r-- | lib/libvfio-user.c | 145 | ||||
-rw-r--r-- | lib/private.h | 11 | ||||
-rw-r--r-- | lib/tran_sock.c | 38 | ||||
-rw-r--r-- | lib/tran_sock.h | 2 | ||||
-rw-r--r-- | samples/client.c | 110 | ||||
-rw-r--r-- | test/py/libvfio_user.py | 7 | ||||
-rw-r--r-- | test/py/test_negotiate.py | 17 |
9 files changed, 304 insertions, 195 deletions
diff --git a/docs/vfio-user.rst b/docs/vfio-user.rst index e801859..a5d2b43 100644 --- a/docs/vfio-user.rst +++ b/docs/vfio-user.rst @@ -317,9 +317,8 @@ Message sizes Some requests have an ``argsz`` field. In a request, it defines the maximum expected reply payload size, which should be at least the size of the fixed -reply payload headers defined here. For messages that don't have a reply -payload, it defines the size of the incoming request (not including the standard -header); otherwise it's not related to the request message size. +reply payload headers defined here. The *request* payload size is defined by the +usual ``msg_size`` field in the header, not the ``argsz`` field. In a reply, the server sets ``argsz`` field to the size needed for a full payload size. This may be less than the requested maximum size. This may be @@ -328,6 +327,11 @@ included in the reply, but the ``argsz`` field in the reply indicates the needed size, allowing a client to allocate a larger buffer for holding the reply before trying again. +In addition, during negotiation (see `Version`_), the client and server may +each specify a ``max_data_xfer_size`` value; this defines the maximum data that +may be read or written via one of the ``VFIO_USER_DMA/REGION_READ/WRITE`` +messages; see `Read and Write Operations`_. + Protocol Specification ====================== @@ -475,43 +479,39 @@ version data 4 variable (including terminating NUL). Optional. The version data is an optional UTF-8 encoded JSON byte array with the following format: -+--------------------+------------------+-----------------------------------+ -| Name | Type | Description | -+====================+==================+===================================+ -| ``"capabilities"`` | collection of | Contains common capabilities that | -| | name/value pairs | the sender supports. Optional. | -+--------------------+------------------+-----------------------------------+ ++--------------+--------+-----------------------------------+ +| Name | Type | Description | ++==============+========+===================================+ +| capabilities | object | Contains common capabilities that | +| | | the sender supports. Optional. | ++--------------+--------+-----------------------------------+ Capabilities: -+--------------------+------------------+-------------------------------------+ -| Name | Type | Description | -+====================+==================+=====================================+ -| ``"max_msg_fds"`` | number | Maximum number of file descriptors | -| | | that can be received by the sender | -| | | in one message. Optional. If not | -| | | specified then the receiver must | -| | | assume ``"max_msg_fds"=1``. | -+--------------------+------------------+-------------------------------------+ -| ``"max_msg_size"`` | number | Maximum message size in bytes that | -| | | the receiver can handle, including | -| | | the header. Optional. If not | -| | | specified then the receiver must | -| | | assume ``"max_msg_size"=4096``. | -+--------------------+------------------+-------------------------------------+ -| ``"migration"`` | collection of | Migration capability parameters. If | -| | name/value pairs | missing then migration is not | -| | | supported by the sender. | -+--------------------+------------------+-------------------------------------+ ++--------------------+--------+------------------------------------------------+ +| Name | Type | Description | ++====================+========+================================================+ +| max_msg_fds | number | Maximum number of file descriptors that can be | +| | | received by the sender in one message. | +| | | Optional. If not specified then the receiver | +| | | must assume a value of ``1``. | ++--------------------+--------+------------------------------------------------+ +| max_data_xfer_size | number | Maximum ``count`` for data transfer messages; | +| | | see `Read and Write Operations`_. Optional, | +| | | with a default value of 1048576 bytes. | ++--------------------+--------+------------------------------------------------+ +| migration | object | Migration capability parameters. If missing | +| | | then migration is not supported by the sender. | ++--------------------+--------+------------------------------------------------+ The migration capability contains the following name/value pairs: -+--------------+--------+-----------------------------------------------+ -| Name | Type | Description | -+==============+========+===============================================+ -| ``"pgsize"`` | number | Page size of dirty pages bitmap. The smallest | -| | | between the client and the server is used. | -+--------------+--------+-----------------------------------------------+ ++--------+--------+-----------------------------------------------+ +| Name | Type | Description | ++========+========+===============================================+ +| pgsize | number | Page size of dirty pages bitmap. The smallest | +| | | between the client and the server is used. | ++--------+--------+-----------------------------------------------+ Reply ^^^^^ @@ -1302,6 +1302,9 @@ There is no payload in the reply. Note that all of these operations must be supported by the client and/or server, even if the corresponding memory or device region has been shared as mappable. +The ``count`` field must not exceed the value of ``max_data_xfer_size`` of the +peer, for both reads and writes. + ``VFIO_USER_REGION_READ`` ------------------------- @@ -1315,16 +1318,16 @@ Request +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -* *Offset* into the region being accessed. -* *Region* is the index of the region being accessed. -* *Count* is the size of the data to be transferred. +* *offset* into the region being accessed. +* *region* is the index of the region being accessed. +* *count* is the size of the data to be transferred. Reply ^^^^^ @@ -1332,19 +1335,19 @@ Reply +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +--------+--------+----------+ -* *Offset* into the region accessed. -* *Region* is the index of the region accessed. -* *Count* is the size of the data transferred. -* *Data* is the data that was read from the device region. +* *offset* into the region accessed. +* *region* is the index of the region accessed. +* *count* is the size of the data transferred. +* *data* is the data that was read from the device region. ``VFIO_USER_REGION_WRITE`` -------------------------- @@ -1359,19 +1362,19 @@ Request +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +--------+--------+----------+ -* *Offset* into the region being accessed. -* *Region* is the index of the region being accessed. -* *Count* is the size of the data to be transferred. -* *Data* is the data to write +* *offset* into the region being accessed. +* *region* is the index of the region being accessed. +* *count* is the size of the data to be transferred. +* *data* is the data to write Reply ^^^^^ @@ -1379,16 +1382,16 @@ Reply +--------+--------+----------+ | Name | Offset | Size | +========+========+==========+ -| Offset | 0 | 8 | +| offset | 0 | 8 | +--------+--------+----------+ -| Region | 8 | 4 | +| region | 8 | 4 | +--------+--------+----------+ -| Count | 12 | 4 | +| count | 12 | 4 | +--------+--------+----------+ -* *Offset* into the region accessed. -* *Region* is the index of the region accessed. -* *Count* is the size of the data transferred. +* *offset* into the region accessed. +* *region* is the index of the region accessed. +* *count* is the size of the data transferred. ``VFIO_USER_DMA_READ`` ----------------------- @@ -1402,14 +1405,14 @@ Request +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. This address must have +* *address* is the client DMA memory address being accessed. This address must have been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message. -* *Count* is the size of the data to be transferred. +* *count* is the size of the data to be transferred. Reply ^^^^^ @@ -1417,16 +1420,16 @@ Reply +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. -* *Count* is the size of the data transferred. -* *Data* is the data read. +* *address* is the client DMA memory address being accessed. +* *count* is the size of the data transferred. +* *data* is the data read. ``VFIO_USER_DMA_WRITE`` ----------------------- @@ -1440,17 +1443,17 @@ Request +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 8 | +| count | 8 | 8 | +---------+--------+----------+ -| Data | 16 | variable | +| data | 16 | variable | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. This address must have +* *address* is the client DMA memory address being accessed. This address must have been previously exported to the server with a ``VFIO_USER_DMA_MAP`` message. -* *Count* is the size of the data to be transferred. -* *Data* is the data to write +* *count* is the size of the data to be transferred. +* *data* is the data to write Reply ^^^^^ @@ -1458,13 +1461,13 @@ Reply +---------+--------+----------+ | Name | Offset | Size | +=========+========+==========+ -| Address | 0 | 8 | +| address | 0 | 8 | +---------+--------+----------+ -| Count | 8 | 4 | +| count | 8 | 4 | +---------+--------+----------+ -* *Address* is the client DMA memory address being accessed. -* *Count* is the size of the data transferred. +* *address* is the client DMA memory address being accessed. +* *count* is the size of the data transferred. ``VFIO_USER_DEVICE_RESET`` -------------------------- diff --git a/include/vfio-user.h b/include/vfio-user.h index 7be7cf7..43f6877 100644 --- a/include/vfio-user.h +++ b/include/vfio-user.h @@ -50,6 +50,8 @@ extern "C" { #endif +#define VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE (1024 * 1024) + enum vfio_user_command { VFIO_USER_VERSION = 1, VFIO_USER_DMA_MAP = 2, diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 452b15f..62f906a 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -252,6 +252,12 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } + if (ra->count > SERVER_MAX_DATA_XFER_SIZE) { + vfu_log(vfu_ctx, LOG_ERR, "region access count too large (%u)", + ra->count); + return false; + } + if (cmd == VFIO_USER_REGION_WRITE && size - sizeof(*ra) != ra->count) { vfu_log(vfu_ctx, LOG_ERR, "region write count too small: " "expected %lu, got %u", size - sizeof(*ra), ra->count); @@ -265,7 +271,6 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } - // FIXME: ->count unbounded (alloc), except for region size // FIXME: need to audit later for wraparound if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) { vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx " @@ -891,7 +896,7 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) } if (msg->hdr.msg_size < sizeof(msg->hdr)) { - vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %d in header", + vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: bad size %u in header", msg->hdr.msg_id, msg->hdr.msg_size); return false; } else if (msg->hdr.msg_size == sizeof(msg->hdr) && @@ -900,6 +905,11 @@ is_valid_header(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) msg->hdr.msg_id, msg->hdr.cmd); return false; } else if (msg->hdr.msg_size > SERVER_MAX_MSG_SIZE) { + /* + * We know we can reject this: all normal requests shouldn't need this + * amount of space, including VFIO_USER_REGION_WRITE, which should be + * bound by max_data_xfer_size. + */ vfu_log(vfu_ctx, LOG_ERR, "msg%#hx: size of %u is too large", msg->hdr.msg_id, msg->hdr.msg_size); return false; @@ -1603,82 +1613,105 @@ vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt); } -EXPORT int -vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +static int +vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, + dma_sg_t *sg, void *data) { - struct vfio_user_dma_region_access *dma_recv; - struct vfio_user_dma_region_access dma_send; - uint64_t recv_size; - int msg_id = 1, ret; + struct vfio_user_dma_region_access *dma_reply; + struct vfio_user_dma_region_access *dma_req; + struct vfio_user_dma_region_access dma; + static int msg_id = 1; + size_t remaining; + size_t count; + size_t rlen; + void *rbuf; assert(vfu_ctx != NULL); assert(sg != NULL); - recv_size = sizeof(*dma_recv) + sg->length; + rlen = sizeof(struct vfio_user_dma_region_access) + + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); + + rbuf = calloc(1, rlen); - dma_recv = calloc(recv_size, 1); - if (dma_recv == NULL) { + if (rbuf == NULL) { return -1; } - dma_send.addr = (uint64_t)sg->dma_addr; - dma_send.count = sg->length; - ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ, - &dma_send, sizeof(dma_send), NULL, - dma_recv, recv_size); + remaining = sg->length; + count = 0; - if (ret < 0) { - if (errno == ENOMSG) { - vfu_reset_ctx(vfu_ctx, "closed"); - errno = ENOTCONN; - } else if (errno == ECONNRESET) { - vfu_reset_ctx(vfu_ctx, "reset"); - errno = ENOTCONN; - } + if (cmd == VFIO_USER_DMA_READ) { + dma_req = &dma; + dma_reply = rbuf; } else { - /* FIXME no need for memcpy */ - memcpy(data, dma_recv->data, sg->length); + dma_req = rbuf; + dma_reply = &dma; } - free(dma_recv); + while (remaining > 0) { + int ret; - return ret; -} + dma_req->addr = (uint64_t)sg->dma_addr + count; + dma_req->count = MIN(remaining, vfu_ctx->client_max_data_xfer_size); -EXPORT int -vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) -{ - struct vfio_user_dma_region_access *dma_send, dma_recv; - uint64_t send_size = sizeof(*dma_send) + sg->length; - int msg_id = 1, ret; + if (cmd == VFIO_USER_DMA_WRITE) { + memcpy(rbuf + sizeof(*dma_req), data + count, dma_req->count); - assert(vfu_ctx != NULL); - assert(sg != NULL); + ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_WRITE, + rbuf, rlen, NULL, + dma_reply, sizeof(*dma_reply)); + } else { + ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id++, VFIO_USER_DMA_READ, + dma_req, sizeof(*dma_req), NULL, + rbuf, rlen); + } - dma_send = calloc(send_size, 1); - if (dma_send == NULL) { - return -1; - } - dma_send->addr = (uint64_t)sg->dma_addr; - dma_send->count = sg->length; - memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ - ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE, - dma_send, send_size, NULL, - &dma_recv, sizeof(dma_recv)); + if (ret < 0) { + ret = errno; + if (ret == ENOMSG) { + vfu_reset_ctx(vfu_ctx, "closed"); + ret = ENOTCONN; + } else if (errno == ECONNRESET) { + vfu_reset_ctx(vfu_ctx, "reset"); + ret = ENOTCONN; + } + free(rbuf); + return ERROR_INT(ret); + } - if (ret < 0) { - if (errno == ENOMSG) { - vfu_reset_ctx(vfu_ctx, "closed"); - errno = ENOTCONN; - } else if (errno == ECONNRESET) { - vfu_reset_ctx(vfu_ctx, "reset"); - errno = ENOTCONN; + if (dma_reply->addr != dma_req->addr || + dma_reply->count != dma_req->count) { + vfu_log(vfu_ctx, LOG_ERR, "bad reply to DMA transfer: " + "request:%#lx,%lu reply:%#lx,%lu", + dma_req->addr, dma_req->count, + dma_reply->addr, dma_reply->count); + free(rbuf); + return ERROR_INT(EINVAL); } + + if (cmd == VFIO_USER_DMA_READ) { + memcpy(data + count, rbuf + sizeof(*dma_reply), dma_req->count); + } + + count += dma_req->count; + remaining -= dma_req->count; } - free(dma_send); + free(rbuf); + return 0; +} - return ret; +EXPORT int +vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data); +} + +EXPORT int +vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data); } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/private.h b/lib/private.h index c7c0627..f474e1f 100644 --- a/lib/private.h +++ b/lib/private.h @@ -37,6 +37,7 @@ #include "common.h" #include "pci_caps.h" +#include "vfio-user.h" /* * The main reason we limit the size of an individual DMA region from the client @@ -46,7 +47,14 @@ #define MAX_DMA_SIZE (8 * ONE_TB) #define MAX_DMA_REGIONS 16 -#define SERVER_MAX_MSG_SIZE 65536 +#define SERVER_MAX_DATA_XFER_SIZE (VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE) + +/* + * Enough to receive a VFIO_USER_REGION_WRITE of SERVER_MAX_DATA_XFER_SIZE. + */ +#define SERVER_MAX_MSG_SIZE (SERVER_MAX_DATA_XFER_SIZE + \ + sizeof(struct vfio_user_header) + \ + sizeof(struct vfio_user_region_access)) /* * Structure used to hold an in-flight request+reply. @@ -155,6 +163,7 @@ struct vfu_ctx { vfu_dma_unregister_cb_t *dma_unregister; int client_max_fds; + size_t client_max_data_xfer_size; struct migration *migration; diff --git a/lib/tran_sock.c b/lib/tran_sock.c index 12b3321..f8b7004 100644 --- a/lib/tran_sock.c +++ b/lib/tran_sock.c @@ -63,7 +63,7 @@ tran_sock_send_iovec(int sock, uint16_t msg_id, bool is_reply, int *fds, int count, int err) { int ret; - struct vfio_user_header hdr = {.msg_id = msg_id}; + struct vfio_user_header hdr = { .msg_id = msg_id }; struct msghdr msg; size_t i; size_t size = count * sizeof(*fds); @@ -250,6 +250,10 @@ tran_sock_recv_fds(int sock, struct vfio_user_header *hdr, bool is_reply, } } + if (hdr->msg_size < sizeof(*hdr) || hdr->msg_size > SERVER_MAX_MSG_SIZE) { + return ERROR_INT(EINVAL); + } + if (len != NULL && *len > 0 && hdr->msg_size > sizeof(*hdr)) { ret = recv(sock, data, MIN(hdr->msg_size - sizeof(*hdr), *len), MSG_WAITALL); @@ -276,8 +280,6 @@ tran_sock_recv(int sock, struct vfio_user_header *hdr, bool is_reply, /* * Like tran_sock_recv(), but will automatically allocate reply data. - * - * FIXME: this does an unconstrained alloc of client-supplied data. */ int tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, @@ -294,6 +296,7 @@ tran_sock_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, } assert(hdr->msg_size >= sizeof(*hdr)); + assert(hdr->msg_size <= SERVER_MAX_MSG_SIZE); len = hdr->msg_size - sizeof(*hdr); @@ -464,6 +467,7 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx) * { * "capabilities": { * "max_msg_fds": 32, + * "max_data_xfer_size": 1048576 * "migration": { * "pgsize": 4096 * } @@ -474,8 +478,8 @@ tran_sock_get_poll_fd(vfu_ctx_t *vfu_ctx) * available in newer library versions, so we don't use it. */ int -tran_parse_version_json(const char *json_str, - int *client_max_fdsp, size_t *pgsizep) +tran_parse_version_json(const char *json_str, int *client_max_fdsp, + size_t *client_max_data_xfer_sizep, size_t *pgsizep) { struct json_object *jo_caps = NULL; struct json_object *jo_top = NULL; @@ -508,6 +512,18 @@ tran_parse_version_json(const char *json_str, } } + if (json_object_object_get_ex(jo_caps, "max_data_xfer_size", &jo)) { + if (json_object_get_type(jo) != json_type_int) { + goto out; + } + + errno = 0; + *client_max_data_xfer_sizep = (int)json_object_get_int64(jo); + + if (errno != 0) { + goto out; + } + } if (json_object_object_get_ex(jo_caps, "migration", &jo)) { struct json_object *jo2 = NULL; @@ -581,6 +597,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp, } vfu_ctx->client_max_fds = 1; + vfu_ctx->client_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE; if (vlen > sizeof(*cversion)) { const char *json_str = (const char *)cversion->data; @@ -594,6 +611,7 @@ recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp, } ret = tran_parse_version_json(json_str, &vfu_ctx->client_max_fds, + &vfu_ctx->client_max_data_xfer_size, &pgsize); if (ret < 0) { @@ -656,20 +674,20 @@ send_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t msg_id, "{" "\"capabilities\":{" "\"max_msg_fds\":%u," - "\"max_msg_size\":%u" + "\"max_data_xfer_size\":%u" "}" - "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE); + "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE); } else { slen = snprintf(server_caps, sizeof(server_caps), "{" "\"capabilities\":{" "\"max_msg_fds\":%u," - "\"max_msg_size\":%u," + "\"max_data_xfer_size\":%u," "\"migration\":{" "\"pgsize\":%zu" "}" "}" - "}", SERVER_MAX_FDS, SERVER_MAX_MSG_SIZE, + "}", SERVER_MAX_FDS, SERVER_MAX_DATA_XFER_SIZE, migration_get_pgsize(vfu_ctx->migration)); } @@ -787,6 +805,8 @@ tran_sock_recv_body(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ts = vfu_ctx->tran_data; + assert(msg->in_size <= SERVER_MAX_MSG_SIZE); + msg->in_data = malloc(msg->in_size); if (msg->in_data == NULL) { diff --git a/lib/tran_sock.h b/lib/tran_sock.h index dbf54b1..efc5d60 100644 --- a/lib/tran_sock.h +++ b/lib/tran_sock.h @@ -54,7 +54,7 @@ extern struct transport_ops tran_sock_ops; */ int tran_parse_version_json(const char *json_str, int *client_max_fdsp, - size_t *pgsizep); + size_t *client_max_data_xfer_sizep, size_t *pgsizep); /* * Send a message to the other end. The iovecs array should leave the first diff --git a/samples/client.c b/samples/client.c index 12eee21..13880c5 100644 --- a/samples/client.c +++ b/samples/client.c @@ -51,6 +51,9 @@ #define CLIENT_MAX_FDS (32) +/* This is low, so we get testing of vfu_dma_read/write() chunking. */ +#define CLIENT_MAX_DATA_XFER_SIZE (1024) + static char *irq_to_str[] = { [VFU_DEV_INTX_IRQ] = "INTx", [VFU_DEV_MSI_IRQ] = "MSI", @@ -105,11 +108,12 @@ send_version(int sock) "{" "\"capabilities\":{" "\"max_msg_fds\":%u," + "\"max_data_xfer_size\":%u," "\"migration\":{" "\"pgsize\":%zu" "}" "}" - "}", CLIENT_MAX_FDS, sysconf(_SC_PAGESIZE)); + "}", CLIENT_MAX_FDS, CLIENT_MAX_DATA_XFER_SIZE, sysconf(_SC_PAGESIZE)); cversion.major = LIB_VFIO_USER_MAJOR; cversion.minor = LIB_VFIO_USER_MINOR; @@ -130,7 +134,8 @@ send_version(int sock) } static void -recv_version(int sock, int *server_max_fds, size_t *pgsize) +recv_version(int sock, int *server_max_fds, size_t *server_max_data_xfer_size, + size_t *pgsize) { struct vfio_user_version *sversion = NULL; struct vfio_user_header hdr; @@ -167,6 +172,7 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) } *server_max_fds = 1; + *server_max_data_xfer_size = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE; *pgsize = sysconf(_SC_PAGESIZE); if (vlen > sizeof(*sversion)) { @@ -177,7 +183,8 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) errx(EXIT_FAILURE, "ignoring invalid JSON from server"); } - ret = tran_parse_version_json(json_str, server_max_fds, pgsize); + ret = tran_parse_version_json(json_str, server_max_fds, + server_max_data_xfer_size, pgsize); if (ret < 0) { err(EXIT_FAILURE, "failed to parse server JSON \"%s\"", json_str); @@ -188,10 +195,11 @@ recv_version(int sock, int *server_max_fds, size_t *pgsize) } static void -negotiate(int sock, int *server_max_fds, size_t *pgsize) +negotiate(int sock, int *server_max_fds, size_t *server_max_data_xfer_size, + size_t *pgsize) { send_version(sock); - recv_version(sock, server_max_fds, pgsize); + recv_version(sock, server_max_fds, server_max_data_xfer_size, pgsize); } static void @@ -562,21 +570,27 @@ handle_dma_write(int sock, struct vfio_user_dma_map *dma_regions, } for (i = 0; i < nr_dma_regions; i++) { - if (dma_regions[i].addr == dma_access.addr) { - ret = pwrite(dma_region_fds[i], data, dma_access.count, - dma_regions[i].offset); - if (ret < 0) { - err(EXIT_FAILURE, - "failed to write data to fd=%d at %#lx-%#lx", - dma_region_fds[i], - dma_regions[i].offset, - dma_regions[i].offset + dma_access.count - 1); - } - break; - } + off_t offset; + ssize_t c; + + if (dma_access.addr < dma_regions[i].addr || + dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) { + continue; + } + + offset = dma_regions[i].offset + dma_access.addr; + + c = pwrite(dma_region_fds[i], data, dma_access.count, offset); + + if (c != (ssize_t)dma_access.count) { + err(EXIT_FAILURE, "failed to write to fd=%d at [%#lx-%#lx)", + dma_region_fds[i], offset, offset + dma_access.count); + } + break; } - dma_access.count = 0; + assert(i != nr_dma_regions); + ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_WRITE, &dma_access, sizeof(dma_access)); if (ret < 0) { @@ -606,24 +620,36 @@ handle_dma_read(int sock, struct vfio_user_dma_map *dma_regions, if (response == NULL) { err(EXIT_FAILURE, NULL); } + response->addr = dma_access.addr; response->count = dma_access.count; data = (char *)response->data; for (i = 0; i < nr_dma_regions; i++) { - if (dma_regions[i].addr == dma_access.addr) { - if (pread(dma_region_fds[i], data, dma_access.count, dma_regions[i].offset) == -1) { - err(EXIT_FAILURE, "failed to write data at %#lx-%#lx", - dma_regions[i].offset, - dma_regions[i].offset + dma_access.count); - } - break; - } + off_t offset; + ssize_t c; + + if (dma_access.addr < dma_regions[i].addr || + dma_access.addr >= dma_regions[i].addr + dma_regions[i].size) { + continue; + } + + offset = dma_regions[i].offset + dma_access.addr; + + c = pread(dma_region_fds[i], data, dma_access.count, offset); + + if (c != (ssize_t)dma_access.count) { + err(EXIT_FAILURE, "failed to read from fd=%d at [%#lx-%#lx)", + dma_region_fds[i], offset, offset + dma_access.count); + } + break; } + assert(i != nr_dma_regions); + ret = tran_sock_send(sock, msg_id, true, VFIO_USER_DMA_READ, response, response_sz); if (ret < 0) { - err(EXIT_FAILURE, "failed to send reply of DMA write"); + err(EXIT_FAILURE, "failed to send reply of DMA read"); } free(response); } @@ -632,8 +658,14 @@ static void handle_dma_io(int sock, struct vfio_user_dma_map *dma_regions, int nr_dma_regions, int *dma_region_fds) { - handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds); - handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds); + size_t i; + + for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) { + handle_dma_write(sock, dma_regions, nr_dma_regions, dma_region_fds); + } + for (i = 0; i < 4096 / CLIENT_MAX_DATA_XFER_SIZE; i++) { + handle_dma_read(sock, dma_regions, nr_dma_regions, dma_region_fds); + } } static void @@ -678,12 +710,6 @@ get_dirty_bitmap(int sock, struct vfio_user_dma_map *dma_region) free(data); } -enum migration { - NO_MIGRATION, - MIGRATION_SOURCE, - MIGRATION_DESTINATION, -}; - static void usage(char *argv0) { @@ -891,8 +917,9 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, static int migrate_to(char *old_sock_path, int *server_max_fds, - size_t *pgsize, size_t nr_iters, struct iovec *migr_iters, - char *path_to_server, unsigned char *src_md5sum, size_t bar1_size) + size_t *server_max_data_xfer_size, size_t *pgsize, size_t nr_iters, + struct iovec *migr_iters, char *path_to_server, + unsigned char *src_md5sum, size_t bar1_size) { int ret, sock; char *sock_path; @@ -947,7 +974,7 @@ migrate_to(char *old_sock_path, int *server_max_fds, sock = init_sock(sock_path); free(sock_path); - negotiate(sock, server_max_fds, pgsize); + negotiate(sock, server_max_fds, server_max_data_xfer_size, pgsize); /* XXX set device state to resuming */ ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, @@ -1060,6 +1087,7 @@ int main(int argc, char *argv[]) int i; FILE *fp; int server_max_fds; + size_t server_max_data_xfer_size; size_t pgsize; int nr_dma_regions; struct vfio_user_dirty_pages dirty_pages = {0}; @@ -1095,7 +1123,7 @@ int main(int argc, char *argv[]) * * Do intial negotiation with the server, and discover parameters. */ - negotiate(sock, &server_max_fds, &pgsize); + negotiate(sock, &server_max_fds, &server_max_data_xfer_size, &pgsize); /* try to access a bogus region, we should get an error */ ret = access_region(sock, 0xdeadbeef, false, 0, &ret, sizeof(ret)); @@ -1240,8 +1268,8 @@ int main(int argc, char *argv[]) err(EXIT_FAILURE, "failed to asprintf"); } - sock = migrate_to(argv[optind], &server_max_fds, &pgsize, - nr_iters, migr_iters, path_to_server, + sock = migrate_to(argv[optind], &server_max_fds, &server_max_data_xfer_size, + &pgsize, nr_iters, migr_iters, path_to_server, md5sum, bar1_size); free(path_to_server); for (i = 0; i < (int)nr_iters; i++) { diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py index b1ebd4b..bdb812a 100644 --- a/test/py/libvfio_user.py +++ b/test/py/libvfio_user.py @@ -106,10 +106,13 @@ VFIO_USER_CLIENT_MAX_FDS_LIMIT = 1024 SERVER_MAX_FDS = 8 -SERVER_MAX_MSG_SIZE = 65536 +ONE_TB = (1024 * 1024 * 1024 * 1024) + +VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE = (1024 * 1024) +SERVER_MAX_DATA_XFER_SIZE = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE +SERVER_MAX_MSG_SIZE = SERVER_MAX_DATA_XFER_SIZE + 16 + 16 MAX_DMA_REGIONS = 16 -ONE_TB = (1024 * 1024 * 1024 * 1024) MAX_DMA_SIZE = (8 * ONE_TB) # enum vfio_user_command diff --git a/test/py/test_negotiate.py b/test/py/test_negotiate.py index e05b450..a3f2a8a 100644 --- a/test/py/test_negotiate.py +++ b/test/py/test_negotiate.py @@ -66,6 +66,15 @@ def test_short_write(): vfu_attach_ctx(ctx, expect=errno.EINVAL) get_reply(sock, expect=errno.EINVAL) +def test_long_write(): + sock = connect_sock() + hdr = vfio_user_header(VFIO_USER_VERSION, size=SERVER_MAX_MSG_SIZE + 1) + sock.send(hdr) + + ret = vfu_attach_ctx(ctx, expect=errno.EINVAL) + assert ret == -1 + assert c.get_errno() == errno.EINVAL + def test_bad_command(): sock = connect_sock() @@ -152,7 +161,7 @@ def test_valid_negotiate_no_json(): assert minor == LIBVFIO_USER_MINOR json = parse_json(json_str) assert json.capabilities.max_msg_fds == SERVER_MAX_FDS - assert json.capabilities.max_msg_size == SERVER_MAX_MSG_SIZE + assert json.capabilities.max_data_xfer_size == SERVER_MAX_DATA_XFER_SIZE # FIXME: migration object checks disconnect_client(ctx, sock) @@ -164,8 +173,10 @@ def test_valid_negotiate_empty_json(): vfu_run_ctx(ctx) def test_valid_negotiate_json(): - client_version_json(json=bytes('{ "capabilities": { "max_msg_fds": %s } }' % - VFIO_USER_CLIENT_MAX_FDS_LIMIT, "utf-8")) + client_version_json(json=bytes( + '{ "capabilities": { "max_msg_fds": %s, "max_data_xfer_size": %u } }' % + (VFIO_USER_CLIENT_MAX_FDS_LIMIT, VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE), + "utf-8")) # notice client closed connection vfu_run_ctx(ctx) |