aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2020-09-24 11:58:46 -0400
committerThanos Makatos <thanos.makatos@nutanix.com>2020-09-24 12:14:17 -0400
commitd0634899d7d76872efca35bf79a0e3f31dcc8d74 (patch)
tree0effbdb6d918304b6b3a21ea2aefaee2ca0d4b42
parent6d6470ab820212059f108a458ceb9e6e98ab1ef6 (diff)
downloadlibvfio-user-d0634899d7d76872efca35bf79a0e3f31dcc8d74.zip
libvfio-user-d0634899d7d76872efca35bf79a0e3f31dcc8d74.tar.gz
libvfio-user-d0634899d7d76872efca35bf79a0e3f31dcc8d74.tar.bz2
implement VFIO_USER_DMA_MAP
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
-rw-r--r--lib/libmuser.c162
-rw-r--r--lib/muser_priv.h6
-rw-r--r--samples/client.c105
-rw-r--r--samples/gpio-pci-idio-16.c13
4 files changed, 203 insertions, 83 deletions
diff --git a/lib/libmuser.c b/lib/libmuser.c
index e37c674..9defaea 100644
--- a/lib/libmuser.c
+++ b/lib/libmuser.c
@@ -60,6 +60,8 @@
#include "dma.h"
#include "cap.h"
+#define MAX_FDS 8
+
#define IOMMU_GRP_NAME "iommu_group"
typedef enum {
@@ -100,6 +102,8 @@ struct lm_ctx {
int iommu_dir_fd;
int sock_flags;
+ int client_max_fds;
+
lm_irqs_t irqs; /* XXX must be last */
};
@@ -153,7 +157,9 @@ recv_fds_kernel(lm_ctx_t *lm_ctx, void *buf, size_t size)
}
static int
-get_request_kernel(lm_ctx_t *lm_ctx, struct vfio_user_header *cmd)
+get_request_kernel(lm_ctx_t *lm_ctx, struct vfio_user_header *cmd,
+ int *fds __attribute__((unused)),
+ int *nr_fds __attribute__((unused)))
{
assert(false);
return ioctl(lm_ctx->fd, MUSER_DEV_CMD_WAIT, &cmd);
@@ -263,7 +269,9 @@ send_vfio_user_msg(int sock, uint16_t msg_id, bool is_reply,
int ret;
struct vfio_user_header hdr = {.msg_id = msg_id};
struct iovec iov[2];
- struct msghdr msg = {.msg_iovlen = 1};
+ struct msghdr msg;
+
+ memset(&msg, 0, sizeof(msg));
if (is_reply) {
hdr.flags.type = VFIO_USER_F_TYPE_REPLY;
@@ -280,9 +288,10 @@ send_vfio_user_msg(int sock, uint16_t msg_id, bool is_reply,
iov[0].iov_base = &hdr;
iov[0].iov_len = sizeof(hdr);
+ msg.msg_iovlen = 1;
if (data != NULL) {
- msg.msg_iovlen = 2;
+ msg.msg_iovlen++;
iov[1].iov_base = data;
iov[1].iov_len = len;
}
@@ -291,17 +300,16 @@ send_vfio_user_msg(int sock, uint16_t msg_id, bool is_reply,
if (fds != NULL) {
size_t size = count * sizeof *fds;
- char buf[CMSG_SPACE(size)];
+ char *buf = alloca(CMSG_SPACE(size));
msg.msg_control = buf;
- msg.msg_controllen = sizeof(buf);
+ msg.msg_controllen = CMSG_SPACE(size);
struct cmsghdr * cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(size);
memcpy(CMSG_DATA(cmsg), fds, size);
- msg.msg_controllen = CMSG_SPACE(size);
}
ret = sendmsg(sock, &msg, 0);
@@ -313,13 +321,14 @@ send_vfio_user_msg(int sock, uint16_t msg_id, bool is_reply,
}
int
-send_version(int sock, int major, int minor, uint16_t msg_id, bool is_reply)
+send_version(int sock, int major, int minor, uint16_t msg_id, bool is_reply,
+ char *caps)
{
int ret;
char *data __attribute__((__cleanup__(__free_s))) = NULL;
- ret = asprintf(&data, "{version: {\"major\": %d, \"minor\": %d}}",
- major, minor);
+ ret = asprintf(&data, "{version: {\"major\": %d, \"minor\": %d}, capabilities: %s}",
+ major, minor, caps != NULL ? caps : "{}");
if (ret == -1) {
data = NULL;
return -1;
@@ -368,7 +377,8 @@ recv_vfio_user_msg(int sock, struct vfio_user_header *hdr, bool is_reply,
}
int
-recv_version(int sock, int *major, int *minor, uint16_t *msg_id, bool is_reply)
+recv_version(int sock, int *major, int *minor, uint16_t *msg_id, bool is_reply,
+ int *max_fds)
{
int ret;
struct vfio_user_header hdr;
@@ -393,9 +403,10 @@ recv_version(int sock, int *major, int *minor, uint16_t *msg_id, bool is_reply)
}
/* FIXME use proper parsing */
- ret = sscanf(data, "{version: {\"major\": %d, \"minor\": %d}}", major,
- minor);
- if (ret != 2) {
+ ret = sscanf(data,
+ "{version: {\"major\": %d, \"minor\": %d}, capabilities: {max_fds: %d}}",
+ major, minor, max_fds);
+ if (ret != 3) {
return -EINVAL;
}
return 0;
@@ -407,28 +418,36 @@ set_version(lm_ctx_t *lm_ctx, int sock)
int ret;
int client_mj, client_mn;
uint16_t msg_id = 0;
+ char *server_caps;
+
+ ret = asprintf(&server_caps, "{max_fds: %d}", MAX_FDS);
+ if (ret == -1) {
+ return -ENOMEM;
+ }
ret = send_version(sock, LIB_MUSER_VFIO_USER_VERS_MJ,
- LIB_MUSER_VFIO_USER_VERS_MN, msg_id, false);
+ LIB_MUSER_VFIO_USER_VERS_MN, msg_id, false, server_caps);
if (ret < 0) {
lm_log(lm_ctx, LM_DBG, "failed to send version: %s", strerror(-ret));
- return ret;
+ goto out;
}
- ret = recv_version(sock, &client_mj, &client_mn, &msg_id, true);
+ ret = recv_version(sock, &client_mj, &client_mn, &msg_id, true,
+ &lm_ctx->client_max_fds);
if (ret < 0) {
lm_log(lm_ctx, LM_DBG, "failed to receive version: %s", strerror(-ret));
- return ret;
+ goto out;
}
if (client_mj != LIB_MUSER_VFIO_USER_VERS_MJ ||
client_mn != LIB_MUSER_VFIO_USER_VERS_MN) {
lm_log(lm_ctx, LM_DBG, "version mismatch, server=%d.%d, client=%d.%d",
LIB_MUSER_VFIO_USER_VERS_MJ, LIB_MUSER_VFIO_USER_VERS_MN,
client_mj, client_mn);
- return -EINVAL;
+ ret = -EINVAL;
}
-
- return 0;
+out:
+ free(server_caps);
+ return ret;
}
/**
@@ -464,19 +483,46 @@ close_sock(lm_ctx_t *lm_ctx)
}
static int
-get_request_sock(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr)
+get_request_sock(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr,
+ int *fds, int *nr_fds)
{
int ret;
+ struct iovec iov = {.iov_base = hdr, .iov_len = sizeof *hdr};
+ struct msghdr msg = {.msg_iov = &iov, .msg_iovlen = 1};
+ struct cmsghdr *cmsg;
+
+ msg.msg_controllen = CMSG_SPACE(sizeof(int) * *nr_fds);
+ msg.msg_control = alloca(msg.msg_controllen);
/*
* TODO ideally we should set O_NONBLOCK on the fd so that the syscall is
* faster (?). I tried that and get short reads, so we need to store the
* partially received buffer somewhere and retry.
*/
- ret = recv(lm_ctx->conn_fd, hdr, sizeof(*hdr), lm_ctx->sock_flags);
+ ret = recvmsg(lm_ctx->conn_fd, &msg, lm_ctx->sock_flags);
if (ret == -1) {
return -errno;
}
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) {
+ continue;
+ }
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(int))) {
+ return -EINVAL;
+ }
+ int size = cmsg->cmsg_len - CMSG_LEN(0);
+ if (size % sizeof(int) != 0) {
+ return -EINVAL;
+ }
+ int i;
+ *nr_fds = (int)(size / sizeof(int));
+ for (i = 0; i < *nr_fds; i++) {
+ //memcpy(fds[i], CMSG_DATA(cmsg) + sizeof(int) * i, sizeof *fds);
+ fds[i] = *(CMSG_DATA(cmsg) + sizeof(int) * i);
+ }
+ }
+
return ret;
}
@@ -520,7 +566,7 @@ static struct transport_ops {
int (*init)(lm_ctx_t*);
int (*attach)(lm_ctx_t*);
int(*detach)(lm_ctx_t*);
- int (*get_request)(lm_ctx_t*, struct vfio_user_header*);
+ int (*get_request)(lm_ctx_t*, struct vfio_user_header*, int *fds, int *nr_fds);
int (*send_response)(lm_ctx_t*, struct vfio_user_header*);
ssize_t (*recv_fds)(lm_ctx_t*, void *buf, size_t size);
} transports_ops[] = {
@@ -1599,33 +1645,62 @@ static int handle_device_get_info(lm_ctx_t *lm_ctx,
}
static int
-handle_dma_map(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr)
+handle_dma_map(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr,
+ int *fds, int nr_fds)
{
int ret, i;
- struct vfio_user_dma_region dma_region;
+ int nr_dma_regions;
+ struct vfio_user_dma_region *dma_regions;
assert(lm_ctx != NULL);
assert(hdr != NULL);
hdr->msg_size -= sizeof *hdr;
- if ((hdr->msg_size < sizeof(struct vfio_user_dma_region)) ||
- (hdr->msg_size % sizeof(struct vfio_user_dma_region) != 0)) {
+ if (hdr->msg_size % sizeof(struct vfio_user_dma_region) != 0) {
lm_log(lm_ctx, LM_ERR, "bad size of DMA regions %d", hdr->msg_size);
return -EINVAL;
}
- for (i = 0; i < (int)(hdr->msg_size / sizeof(struct vfio_user_dma_region));
- i++) {
- ret = recv(lm_ctx->conn_fd, &dma_region, sizeof dma_region, 0);
- if (ret == -1) {
- lm_log(lm_ctx, LM_ERR, "failed to receive DMA region: %m");
- return -errno;
- }
+ nr_dma_regions = (int)(hdr->msg_size / sizeof(struct vfio_user_dma_region));
+ if (nr_dma_regions != nr_fds) {
+ lm_log(lm_ctx, LM_ERR, "expected %d fds but got %d instead",
+ nr_dma_regions, nr_fds);
+ return -EINVAL;
+ }
+
+ dma_regions = alloca(nr_dma_regions * sizeof(*dma_regions));
+
+ ret = recv(lm_ctx->conn_fd, dma_regions, hdr->msg_size, 0);
+ if (ret == -1) {
+ lm_log(lm_ctx, LM_ERR, "failed to receive DMA region entries: %m");
+ return -errno;
+ }
- lm_log(lm_ctx, LM_DBG, "received DMA region %#lx-%#lx offset=%#lx",
- dma_region.addr, dma_region.addr + dma_region.size - 1,
- dma_region.offset);
+ if (lm_ctx->dma == NULL) {
+ return 0;
+ }
+
+ for (i = 0; i < nr_dma_regions; i++) {
+ lm_log(lm_ctx, LM_DBG, "received DMA region %#lx-%#lx offset=%#lx fd=%d",
+ dma_regions[i].addr, dma_regions[i].addr + dma_regions[i].size - 1,
+ dma_regions[i].offset, fds[i]);
+
+ ret = dma_controller_add_region(lm_ctx->dma,
+ dma_regions[i].addr,
+ dma_regions[i].size,
+ fds[i],
+ dma_regions[i].offset);
+ if (ret < 0) {
+ lm_log(lm_ctx, LM_DBG, "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s",
+ strerror(-ret), dma_regions[i].addr,
+ dma_regions[i].addr + dma_regions[i].size - 1,
+ dma_regions[i].offset, fds[i]);
+ return ret;
+ }
+ if (lm_ctx->map_dma != NULL) {
+ lm_ctx->map_dma(lm_ctx->pvt, dma_regions[i].addr, dma_regions[i].size);
+ }
}
return 0;
}
@@ -1640,13 +1715,20 @@ process_request(lm_ctx_t *lm_ctx)
{
struct vfio_user_header hdr = {};
int ret;
+ int *fds = NULL;
+ int nr_fds;
+
+ assert(lm_ctx != NULL);
+
+ nr_fds = lm_ctx->client_max_fds;
+ fds = alloca(nr_fds * sizeof(int));
- ret = transports_ops[lm_ctx->trans].get_request(lm_ctx, &hdr);
+ ret = transports_ops[lm_ctx->trans].get_request(lm_ctx, &hdr, fds, &nr_fds);
if (unlikely(ret < 0)) {
if (ret == -EAGAIN || ret == -EWOULDBLOCK) {
return 0;
}
- lm_log(lm_ctx, LM_ERR, "failed to receive request: %m");
+ lm_log(lm_ctx, LM_ERR, "failed to receive request: %s", strerror(-ret));
return ret;
}
if (unlikely(ret == 0)) {
@@ -1675,7 +1757,7 @@ process_request(lm_ctx_t *lm_ctx)
switch (hdr.cmd) {
case VFIO_USER_DMA_MAP:
- handle_dma_map(lm_ctx, &hdr);
+ handle_dma_map(lm_ctx, &hdr, fds, nr_fds);
break;
case VFIO_USER_DEVICE_GET_INFO:
ret = handle_device_get_info(lm_ctx, &hdr);
diff --git a/lib/muser_priv.h b/lib/muser_priv.h
index a69b25a..7a1245a 100644
--- a/lib/muser_priv.h
+++ b/lib/muser_priv.h
@@ -55,9 +55,11 @@ recv_vfio_user_msg(int sock, struct vfio_user_header *hdr, bool is_reply,
uint16_t *msg_id);
int
-send_version(int sock, int major, int minor, uint16_t msg_id, bool is_reply);
+send_version(int sock, int major, int minor, uint16_t msg_id, bool is_reply,
+ char *caps);
int
-recv_version(int sock, int *major, int *minor, uint16_t *msg_id, bool is_reply);
+recv_version(int sock, int *major, int *minor, uint16_t *msg_id, bool is_reply,
+ int *max_fds);
#endif /* MUSER_PRIV_H */
diff --git a/samples/client.c b/samples/client.c
index f44583a..d22c3a7 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -28,6 +28,7 @@
*
*/
+#define _GNU_SOURCE
#include <stdio.h>
#include <sys/socket.h>
#include <sys/un.h>
@@ -67,31 +68,43 @@ map_dma(int sock)
}
static int
-set_version(int sock)
+set_version(int sock, int client_max_fds, int *server_max_fds)
{
int ret, mj, mn;
uint16_t msg_id;
+ char *client_caps = NULL;
- ret = recv_version(sock, &mj, &mn, &msg_id, false);
+ ret = recv_version(sock, &mj, &mn, &msg_id, false, server_max_fds);
if (ret < 0) {
fprintf(stderr, "failed to receive version from server: %s\n",
strerror(-ret));
- return ret;
+ goto out;
}
if (mj != LIB_MUSER_VFIO_USER_VERS_MJ || mn != LIB_MUSER_VFIO_USER_VERS_MN) {
fprintf(stderr, "bad server version %d.%d\n", mj, mn);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = asprintf(&client_caps, "{max_fds: %d}", client_max_fds);
+ if (ret == -1) {
+ client_caps = NULL;
+ ret = -ENOMEM; /* FIXME */
+ goto out;
}
- ret = send_version(sock, mj, mn, msg_id, true);
+ ret = send_version(sock, mj, mn, msg_id, true, client_caps);
if (ret < 0) {
fprintf(stderr, "failed to send version to server: %s\n",
strerror(-ret));
- return ret;
+ goto out;
}
+ ret = 0;
- return 0;
+out:
+ free(client_caps);
+ return ret;
}
static int
@@ -140,11 +153,16 @@ int main(int argc, char *argv[])
{
int ret, sock;
- char template[] = "XXXXXX";
- struct vfio_user_dma_region dma_regions[2];
- int dma_region_fds[ARRAY_SIZE(dma_regions)];
+ struct vfio_user_dma_region *dma_regions;
+ int *dma_region_fds;
struct vfio_user_header hdr;
- uint16_t msg_id;
+ uint16_t msg_id = 1;
+ int i;
+ FILE *fp;
+ int fd;
+ const int client_max_fds = 32;
+ int server_max_fds;
+ int nr_dma_regions;
if (argc != 2) {
fprintf(stderr, "usage: %s /path/to/socket\n", argv[0]);
@@ -159,7 +177,7 @@ int main(int argc, char *argv[])
* The server proposes version upon connection, we need to send back the
* version the version we support.
*/
- if ((ret = set_version(sock)) < 0) {
+ if ((ret = set_version(sock, client_max_fds, &server_max_fds)) < 0) {
return ret;
}
@@ -168,44 +186,49 @@ int main(int argc, char *argv[])
return ret;
}
- /* Tell the server we have a memory DMA region it can access. */
- if ((dma_region_fds[0] = mkstemp(template)) == -1) {
+ /*
+ * Tell the server we have some DMA regions it can access. Each DMA regions
+ * is accompanied by a file descriptor, so let's create more DMA regions
+ * that can fit in a message that can be handled by the server.
+ */
+ nr_dma_regions = server_max_fds << 1;
+
+ if ((fp = tmpfile()) == NULL) {
perror("failed to create DMA file");
return -1;
}
- if ((ret = ftruncate(dma_region_fds[0], 2 * sysconf(_SC_PAGESIZE))) == -1) {
+
+ if ((ret = ftruncate(fileno(fp), nr_dma_regions * sysconf(_SC_PAGESIZE))) == -1) {
perror("failed to truncate file");
return -1;
}
- dma_regions[0].addr = 0xdeadbeef;
- dma_regions[0].size = sysconf(_SC_PAGESIZE);
- dma_regions[0].offset = 0;
- dma_regions[0].prot = PROT_READ | PROT_WRITE;
- dma_regions[0].flags = VFIO_USER_F_DMA_REGION_MAPPABLE;
+ dma_regions = alloca(sizeof *dma_regions * nr_dma_regions);
+ dma_region_fds = alloca(sizeof *dma_region_fds * nr_dma_regions);
- dma_regions[1].addr = 0xcafebabe;
- dma_regions[1].size = sysconf(_SC_PAGESIZE);
- dma_regions[1].offset = dma_regions[0].size;
- dma_regions[1].prot = PROT_READ | PROT_WRITE;
- dma_regions[1].flags = VFIO_USER_F_DMA_REGION_MAPPABLE;
-
- dma_region_fds[1] = dma_region_fds[0];
-
- msg_id = 1;
- ret = send_vfio_user_msg(sock, msg_id, false, VFIO_USER_DMA_MAP, dma_regions,
- sizeof(dma_regions), dma_region_fds,
- ARRAY_SIZE(dma_region_fds));
-
- if (ret < 0) {
- fprintf(stderr, "failed to send DMA regions: %s\n", strerror(-ret));
- return ret;
+ for (i = 0; i < nr_dma_regions; i++) {
+ dma_regions[i].addr = i * sysconf(_SC_PAGESIZE);
+ dma_regions[i].size = sysconf(_SC_PAGESIZE);
+ dma_regions[i].offset = dma_regions[i].addr;
+ dma_regions[i].prot = PROT_READ | PROT_WRITE;
+ dma_regions[i].flags = VFIO_USER_F_DMA_REGION_MAPPABLE;
+ dma_region_fds[i] = fileno(fp);
}
- ret = recv_vfio_user_msg(sock, &hdr, true, &msg_id);
- if (ret < 0) {
- fprintf(stderr, "failed to receive response for mapping DMA regions: %s\n",
- strerror(-ret));
- return ret;
+
+ for (i = 0; i < nr_dma_regions / server_max_fds; i++, msg_id++) {
+ ret = send_vfio_user_msg(sock, msg_id, false, VFIO_USER_DMA_MAP,
+ dma_regions + (i * server_max_fds), sizeof *dma_regions * server_max_fds,
+ dma_region_fds + (i * server_max_fds), server_max_fds);
+ if (ret < 0) {
+ fprintf(stderr, "failed to send DMA regions: %s\n", strerror(-ret));
+ return ret;
+ }
+ ret = recv_vfio_user_msg(sock, &hdr, true, &msg_id);
+ if (ret < 0) {
+ fprintf(stderr, "failed to receive response for mapping DMA regions: %s\n",
+ strerror(-ret));
+ return ret;
+ }
}
return 0;
diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c
index 3c87103..06320f3 100644
--- a/samples/gpio-pci-idio-16.c
+++ b/samples/gpio-pci-idio-16.c
@@ -65,6 +65,12 @@ static void _sa_handler(int signum __attribute__((unused)))
{
}
+static int
+unmap_dma(void *pvt __attribute__((unused)),
+ uint64_t iova __attribute__((unused)))
+{
+}
+
int main(int argc, char *argv[])
{
int ret;
@@ -105,6 +111,13 @@ int main(int argc, char *argv[])
.irq_count[LM_DEV_INTX_IRQ_IDX] = 1,
},
.uuid = argv[optind],
+
+ /*
+ * Not strictly necessary since this device doesn't yet do any DMA.
+ * By declaring this dummy callback DMA regions get registered,
+ * otherwise they're ignored.
+ */
+ .unmap_dma = unmap_dma
};
sigemptyset(&act.sa_mask);