aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2020-07-08 09:54:26 -0400
committerThanos Makatos <thanos.makatos@nutanix.com>2020-07-08 09:54:26 -0400
commitf7fa79676d6b25b77cc4fd8d54ffbfa2fa96b2bf (patch)
tree79728c4703195995284e3c57c479012d97f82ee3 /lib
parent115d6188f48f58b16e70aa8447f47e3f219acc4d (diff)
downloadlibvfio-user-f7fa79676d6b25b77cc4fd8d54ffbfa2fa96b2bf.zip
libvfio-user-f7fa79676d6b25b77cc4fd8d54ffbfa2fa96b2bf.tar.gz
libvfio-user-f7fa79676d6b25b77cc4fd8d54ffbfa2fa96b2bf.tar.bz2
dma: notify device of DMA removal events
This patch introduces a new device callback that is called when a DMA region is removed. The device must provide this callback and when the callback returns all references to the DMA regions must have been removed. This new functionality is necessary since a DMA region can be removed at any time, so the device must be given a chance to drop all references and perform the necessary cleanup. This patch also stores lm_ctx_t inside dma_controller_t so that we don't have to pass lm_ctx_t when calling dma_XXX functions. Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/dma.c76
-rw-r--r--lib/dma.h43
-rw-r--r--lib/libmuser.c19
-rw-r--r--lib/muser.h7
4 files changed, 106 insertions, 39 deletions
diff --git a/lib/dma.c b/lib/dma.c
index cc79807..56c3d39 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -66,7 +66,7 @@ fds_are_same_file(int fd1, int fd2)
}
dma_controller_t *
-dma_controller_create(int max_regions)
+dma_controller_create(lm_ctx_t *lm_ctx, int max_regions)
{
dma_controller_t *dma;
@@ -77,6 +77,7 @@ dma_controller_create(int max_regions)
return dma;
}
+ dma->lm_ctx = lm_ctx;
dma->max_regions = max_regions;
dma->nregions = 0;
memset(dma->regions, 0, max_regions * sizeof(dma->regions[0]));
@@ -85,36 +86,53 @@ dma_controller_create(int max_regions)
}
static void
-_dma_controller_do_remove_region(lm_ctx_t *lm_ctx, dma_memory_region_t *region)
+_dma_controller_do_remove_region(dma_controller_t *dma,
+ dma_memory_region_t *region)
{
int err;
- assert(region);
+
+ assert(dma != NULL);
+ assert(region != NULL);
+
err = dma_unmap_region(region, region->virt_addr, region->size);
if (err != 0) {
- lm_log(lm_ctx, LM_DBG, "failed to unmap fd=%d vaddr=%#lx-%#lx\n",
+ lm_log(dma->lm_ctx, LM_DBG, "failed to unmap fd=%d vaddr=%#lx-%#lx\n",
region->fd, region->virt_addr, region->size);
}
if (region->fd != -1) {
if (close(region->fd) == -1) {
- lm_log(lm_ctx, LM_DBG, "failed to close fd %d: %m\n", region->fd);
+ lm_log(dma->lm_ctx, LM_DBG, "failed to close fd %d: %m\n", region->fd);
}
}
}
/* FIXME not thread safe */
int
-dma_controller_remove_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size)
+dma_controller_remove_region(dma_controller_t *dma,
+ dma_addr_t dma_addr, size_t size,
+ int (*unmap_dma) (void*, uint64_t), void *data)
{
int idx;
dma_memory_region_t *region;
+ int err;
- assert(dma);
+ assert(dma != NULL);
for (idx = 0; idx < dma->nregions; idx++) {
region = &dma->regions[idx];
if (region->dma_addr == dma_addr && region->size == size) {
- _dma_controller_do_remove_region(lm_ctx, region);
+ if (region->refcnt > 0) {
+ err = unmap_dma(data, region->dma_addr);
+ if (err != 0) {
+ lm_log(dma->lm_ctx, LM_ERR,
+ "failed to notify of removal of DMA region %#lx-%#lx: %s\n",
+ region->dma_addr, region->dma_addr + region->size,
+ strerror(-err));
+ return err;
+ }
+ assert(region->refcnt > 0);
+ }
+ _dma_controller_do_remove_region(dma, region);
if (dma->nregions > 1)
/*
* FIXME valgrind complains with 'Source and destination overlap in memcpy',
@@ -130,7 +148,7 @@ dma_controller_remove_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
}
static inline void
-dma_controller_remove_regions(lm_ctx_t *ctx, dma_controller_t *dma)
+dma_controller_remove_regions(dma_controller_t *dma)
{
int i;
@@ -139,25 +157,25 @@ dma_controller_remove_regions(lm_ctx_t *ctx, dma_controller_t *dma)
for (i = 0; i < dma->nregions; i++) {
dma_memory_region_t *region = &dma->regions[i];
- lm_log(ctx, LM_INF, "unmap vaddr=%lx IOVA=%lx\n",
+ lm_log(dma->lm_ctx, LM_INF, "unmap vaddr=%#lx IOVA=%#lx\n",
region->virt_addr, region->dma_addr);
- _dma_controller_do_remove_region(ctx, region);
+ _dma_controller_do_remove_region(dma, region);
}
}
void
-dma_controller_destroy(lm_ctx_t *lm_ctx, dma_controller_t *dma)
+dma_controller_destroy(dma_controller_t *dma)
{
if (dma == NULL) {
return;
}
- dma_controller_remove_regions(lm_ctx, dma);
+ dma_controller_remove_regions(dma);
free(dma);
}
int
-dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
+dma_controller_add_region(dma_controller_t *dma,
dma_addr_t dma_addr, size_t size,
int fd, off_t offset)
{
@@ -171,8 +189,8 @@ dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
/* First check if this is the same exact region. */
if (region->dma_addr == dma_addr && region->size == size) {
if (offset != region->offset) {
- lm_log(lm_ctx, LM_ERR, "bad offset for new DMA region %lx+%lx, "
- "want=%d, existing=%d\n",
+ lm_log(dma->lm_ctx, LM_ERR,
+ "bad offset for new DMA region %#lx+%#lx, want=%d, existing=%d\n",
dma_addr, size, offset, region->offset);
goto err;
}
@@ -183,8 +201,9 @@ dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
* the same file, however in the majority of cases we'll be
* using a single fd.
*/
- lm_log(lm_ctx, LM_ERR, "bad fd=%d for new DMA region %lx-%lx, "
- "existing fd=%d\n", fd, region->fd);
+ lm_log(dma->lm_ctx, LM_ERR,
+ "bad fd=%d for new DMA region %#lx-%#lx, existing fd=%d\n",
+ fd, region->fd);
goto err;
}
return idx;
@@ -195,16 +214,17 @@ dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
dma_addr < region->dma_addr + region->size) ||
(region->dma_addr >= dma_addr &&
region->dma_addr < dma_addr + size)) {
- lm_log(lm_ctx, LM_INF, "new DMA region %lx+%lx overlaps with DMA "
- "region %lx-%lx\n", dma_addr, size, region->dma_addr,
- region->size);
+ lm_log(dma->lm_ctx, LM_INF,
+ "new DMA region %#lx+%#lx overlaps with DMA region %#lx-%#lx\n",
+ dma_addr, size, region->dma_addr, region->size);
goto err;
}
}
if (dma->nregions == dma->max_regions) {
idx = dma->max_regions;
- lm_log(lm_ctx, LM_ERR, "reached maxed regions, recompile with higher number of DMA regions\n");
+ lm_log(dma->lm_ctx, LM_ERR,
+ "reached maxed regions, recompile with higher number of DMA regions\n");
goto err;
}
@@ -213,7 +233,7 @@ dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
page_size = fd_get_blocksize(fd);
if (page_size < 0) {
- lm_log(lm_ctx, LM_ERR, "bad page size %d\n", page_size);
+ lm_log(dma->lm_ctx, LM_ERR, "bad page size %d\n", page_size);
goto err;
}
page_size = MAX(page_size, getpagesize());
@@ -223,15 +243,18 @@ dma_controller_add_region(lm_ctx_t *lm_ctx, dma_controller_t *dma,
region->page_size = page_size;
region->offset = offset;
region->fd = fd;
+ region->refcnt = 0;
region->virt_addr = dma_map_region(region, PROT_READ | PROT_WRITE,
0, region->size);
if (region->virt_addr == MAP_FAILED) {
- lm_log(lm_ctx, LM_ERR, "failed to memory map DMA region %lx-%lx: %s\n",
+ lm_log(dma->lm_ctx, LM_ERR,
+ "failed to memory map DMA region %#lx-%#lx: %s\n",
dma_addr, dma_addr + size, strerror(errno));
if (region->fd != -1) {
if (close(region->fd) == -1) {
- lm_log(lm_ctx, LM_DBG, "failed to close fd %d: %m\n", region->fd);
+ lm_log(dma->lm_ctx, LM_DBG, "failed to close fd %d: %m\n",
+ region->fd);
}
}
goto err;
@@ -304,6 +327,7 @@ _dma_addr_sg_split(const dma_controller_t *dma,
size_t region_len = MIN(region_end - dma_addr, len);
if (cnt < max_sg) {
+ sg[cnt].dma_addr = region->dma_addr;
sg[cnt].region = idx;
sg[cnt].offset = dma_addr - region->dma_addr;
sg[cnt].length = region_len;
diff --git a/lib/dma.h b/lib/dma.h
index 3dfe318..8eb6210 100644
--- a/lib/dma.h
+++ b/lib/dma.h
@@ -72,6 +72,8 @@
#include "muser.h"
#include "common.h"
+struct lm_ctx;
+
typedef struct {
dma_addr_t dma_addr; // DMA address of this region
size_t size; // Size of this region
@@ -79,19 +81,21 @@ typedef struct {
int page_size; // Page size of this fd
off_t offset; // File offset
void *virt_addr; // Virtual address of this region
+ int refcnt; // Number of users of this region
} dma_memory_region_t;
typedef struct {
int max_regions;
int nregions;
+ struct lm_ctx *lm_ctx;
dma_memory_region_t regions[0];
} dma_controller_t;
dma_controller_t *
-dma_controller_create(int max_regions);
+dma_controller_create(lm_ctx_t *lm_ctx, int max_regions);
void
-dma_controller_destroy(lm_ctx_t *ctx, dma_controller_t *dma);
+dma_controller_destroy(dma_controller_t *dma);
/* Registers a new memory region.
* Returns:
@@ -101,13 +105,14 @@ dma_controller_destroy(lm_ctx_t *ctx, dma_controller_t *dma);
* (e.g. due to conflict with existing region).
*/
int
-dma_controller_add_region(lm_ctx_t *ctx, dma_controller_t *dma,
+dma_controller_add_region(dma_controller_t *dma,
dma_addr_t dma_addr, size_t size,
int fd, off_t offset);
int
-dma_controller_remove_region(lm_ctx_t *ctx, dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size);
+dma_controller_remove_region(dma_controller_t *dma,
+ dma_addr_t dma_addr, size_t size,
+ int (*unmap_dma) (void*, uint64_t), void *data);
// Helper for dma_addr_to_sg() slow path.
int
@@ -141,6 +146,7 @@ dma_addr_to_sg(const dma_controller_t *dma,
if (likely(max_sg > 0 && len > 0 &&
dma_addr >= region->dma_addr && dma_addr + len <= region_end &&
region_hint < dma->nregions)) {
+ sg->dma_addr = region->dma_addr;
sg->region = region_hint;
sg->offset = dma_addr - region->dma_addr;
sg->length = len;
@@ -169,9 +175,12 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov,
int i;
for (i = 0; i < cnt; i++) {
+ lm_log(dma->lm_ctx, LM_DBG, "map %#lx-%#lx\n",
+ sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length);
region = &dma->regions[sg[i].region];
iov[i].iov_base = region->virt_addr + sg[i].offset;
iov[i].iov_len = sg[i].length;
+ region->refcnt++;
}
return 0;
@@ -180,10 +189,28 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov,
#define UNUSED __attribute__((unused))
static inline void
-dma_unmap_sg(UNUSED dma_controller_t *dma, UNUSED const dma_sg_t *sg,
- UNUSED struct iovec *iov, UNUSED int cnt)
+dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg,
+ UNUSED struct iovec *iov, int cnt)
{
- /* just a placeholder for now */
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ dma_memory_region_t *r;
+ /*
+ * FIXME this double loop will be removed if we replace the array with
+ * tfind(3)
+ */
+ for (r = dma->regions;
+ r < dma->regions + dma->nregions && r->dma_addr != sg[i].dma_addr;
+ r++);
+ if (r > dma->regions + dma->nregions) {
+ /* bad region */
+ continue;
+ }
+ lm_log(dma->lm_ctx, LM_DBG, "unmap %#lx-%#lx\n",
+ sg[i].dma_addr + sg[i].offset, sg[i].dma_addr + sg[i].offset + sg[i].length);
+ r->refcnt--;
+ }
return;
}
diff --git a/lib/libmuser.c b/lib/libmuser.c
index 3cd5f6a..7cd6792 100644
--- a/lib/libmuser.c
+++ b/lib/libmuser.c
@@ -90,6 +90,7 @@ struct lm_ctx {
struct caps *caps;
uint64_t flags;
char *uuid;
+ int (*unmap_dma) (void *pvt, uint64_t iova);
/* LM_TRANS_SOCK */
char *iommu_dir;
@@ -835,9 +836,10 @@ muser_dma_unmap(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
return -EINVAL;
}
- err = dma_controller_remove_region(lm_ctx, lm_ctx->dma,
+ err = dma_controller_remove_region(lm_ctx->dma,
cmd->mmap.request.addr,
- cmd->mmap.request.len);
+ cmd->mmap.request.len,
+ lm_ctx->unmap_dma, lm_ctx->pvt);
if (err != 0 && err != -ENOENT) {
lm_log(lm_ctx, LM_ERR, "failed to remove DMA region %#lx-%#lx: %s\n",
cmd->mmap.request.addr,
@@ -866,7 +868,7 @@ muser_dma_map(lm_ctx_t *lm_ctx, struct muser_cmd *cmd)
return -EINVAL;
}
- err = dma_controller_add_region(lm_ctx, lm_ctx->dma,
+ err = dma_controller_add_region(lm_ctx->dma,
cmd->mmap.request.addr,
cmd->mmap.request.len,
cmd->mmap.request.fd,
@@ -1460,7 +1462,7 @@ lm_ctx_destroy(lm_ctx_t *lm_ctx)
free(lm_ctx->pci_config_space);
transports_ops[lm_ctx->trans].detach(lm_ctx->fd);
if (lm_ctx->dma != NULL) {
- dma_controller_destroy(lm_ctx, lm_ctx->dma);
+ dma_controller_destroy(lm_ctx->dma);
}
free_sparse_mmap_areas(lm_ctx->pci_info.reg_info);
free(lm_ctx->caps);
@@ -1615,6 +1617,11 @@ lm_ctx_create(const lm_dev_info_t *dev_info)
return NULL;
}
+ if (dev_info->unmap_dma == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
+
/*
* FIXME need to check that the number of MSI and MSI-X IRQs are valid
* (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X).
@@ -1692,8 +1699,10 @@ lm_ctx_create(const lm_dev_info_t *dev_info)
}
}
+ lm_ctx->unmap_dma = dev_info->unmap_dma;
+
// Create the internal DMA controller.
- lm_ctx->dma = dma_controller_create(LM_DMA_REGIONS);
+ lm_ctx->dma = dma_controller_create(lm_ctx, LM_DMA_REGIONS);
if (lm_ctx->dma == NULL) {
err = errno;
goto out;
diff --git a/lib/muser.h b/lib/muser.h
index 9c6743f..d7ab4b3 100644
--- a/lib/muser.h
+++ b/lib/muser.h
@@ -60,6 +60,7 @@
typedef uint64_t dma_addr_t;
typedef struct {
+ dma_addr_t dma_addr;
int region;
int length;
uint64_t offset;
@@ -302,6 +303,12 @@ typedef struct {
int (*reset) (void *pvt);
/*
+ * Function that is called when the guest unmaps a DMA region. The device
+ * must release all references to that region before the callback returns.
+ */
+ int (*unmap_dma) (void *pvt, uint64_t iova);
+
+ /*
* PCI capabilities. The user needs to only define the ID and size of each
* capability. The actual capability is not maintained by libmuser. When a
* capability is accessed the appropriate callback function is called.