diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dma.c | 47 | ||||
-rw-r--r-- | lib/dma.h | 40 | ||||
-rw-r--r-- | lib/libvfio-user.c | 110 |
3 files changed, 126 insertions, 71 deletions
@@ -44,6 +44,17 @@ #include "dma.h" #include "private.h" +size_t +dma_sg_size(void) +{ + return sizeof(dma_sg_t); +} + +bool +dma_sg_is_mappable(const dma_controller_t *dma, const dma_sg_t *sg) { + return sg->region[dma->regions].info.vaddr != NULL; +} + static inline ssize_t fd_get_blocksize(int fd) { @@ -88,6 +99,7 @@ dma_controller_create(vfu_ctx_t *vfu_ctx, size_t max_regions, size_t max_size) dma->nregions = 0; memset(dma->regions, 0, max_regions * sizeof(dma->regions[0])); dma->dirty_pgsize = 0; + LIST_INIT(&dma->maps); return dma; } @@ -463,7 +475,24 @@ out: return cnt; } -int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize) +static void +dma_mark_dirty_sgs(dma_controller_t *dma) +{ + struct dma_sg *sg; + + if (dma->dirty_pgsize == 0) { + return; + } + + LIST_FOREACH(sg, &dma->maps, entry) { + if (sg->writeable) { + _dma_mark_dirty(dma, &dma->regions[sg->region], sg); + } + } +} + +int +dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize) { size_t i; @@ -495,6 +524,9 @@ int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize } } dma->dirty_pgsize = pgsize; + + dma_mark_dirty_sgs(dma); + return 0; } @@ -519,7 +551,7 @@ dma_controller_dirty_page_logging_stop(dma_controller_t *dma) int dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, uint64_t len, size_t pgsize, size_t size, - char **data) + char *bitmap) { int ret; ssize_t bitmap_size; @@ -527,7 +559,7 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, dma_memory_region_t *region; assert(dma != NULL); - assert(data != NULL); + assert(bitmap != NULL); /* * FIXME for now we support IOVAs that match exactly the DMA region. This @@ -562,7 +594,14 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, region = &dma->regions[sg.region]; - *data = region->dirty_bitmap; + /* + * TODO race condition between resetting bitmap and user calling + * vfu_map_sg/vfu_unmap_sg(). + */ + memcpy(bitmap, region->dirty_bitmap, size); + memset(region->dirty_bitmap, 0, size); + + dma_mark_dirty_sgs(dma); return 0; } @@ -73,6 +73,7 @@ #include <stdlib.h> #include <limits.h> #include <errno.h> +#include <sys/queue.h> #include "libvfio-user.h" #include "common.h" @@ -82,6 +83,15 @@ struct vfu_ctx; +struct dma_sg { + vfu_dma_addr_t dma_addr; + int region; + uint64_t length; + uint64_t offset; + bool writeable; + LIST_ENTRY(dma_sg) entry; +}; + typedef struct { vfu_dma_info_t info; int fd; // File descriptor to mmap @@ -96,6 +106,7 @@ typedef struct dma_controller { int nregions; struct vfu_ctx *vfu_ctx; size_t dirty_pgsize; // Dirty page granularity + LIST_HEAD(, dma_sg) maps; dma_memory_region_t regions[0]; } dma_controller_t; @@ -132,14 +143,6 @@ _dma_addr_sg_split(const dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t len, dma_sg_t *sg, int max_sg, int prot); -static bool -_dma_should_mark_dirty(const dma_controller_t *dma, int prot) -{ - assert(dma != NULL); - - return (prot & PROT_WRITE) == PROT_WRITE && dma->dirty_pgsize > 0; -} - static void _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region, dma_sg_t *sg) @@ -172,10 +175,7 @@ dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, vfu_dma_addr_t dma_addr, sg->region = region_index; sg->offset = dma_addr - region->info.iova.iov_base; sg->length = len; - if (_dma_should_mark_dirty(dma, prot)) { - _dma_mark_dirty(dma, region, sg); - } - sg->mappable = (region->info.vaddr != NULL); + sg->writeable = prot & PROT_WRITE; return 0; } @@ -225,7 +225,7 @@ dma_addr_to_sg(const dma_controller_t *dma, } static inline int -dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, +dma_map_sg(dma_controller_t *dma, dma_sg_t *sg, struct iovec *iov, int cnt) { dma_memory_region_t *region; @@ -245,6 +245,12 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, return ERROR_INT(EFAULT); } + if (sg->writeable) { + if (dma->dirty_pgsize > 0) { + _dma_mark_dirty(dma, region, sg); + } + LIST_INSERT_HEAD(&dma->maps, &sg[i], entry); + } vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %p-%p", sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length); @@ -276,6 +282,9 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, /* bad region */ continue; } + if (sg->writeable) { + LIST_REMOVE(sg, entry); + } vfu_log(dma->vfu_ctx, LOG_DEBUG, "unmap %p-%p", sg[i].dma_addr + sg[i].offset, sg[i].dma_addr + sg[i].offset + sg[i].length); @@ -292,7 +301,10 @@ dma_controller_dirty_page_logging_stop(dma_controller_t *dma); int dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, uint64_t len, size_t pgsize, size_t size, - char **data); + char *bitmap); +bool +dma_sg_is_mappable(const dma_controller_t *dma, const dma_sg_t *sg); + #endif /* LIB_VFIO_USER_DMA_H */ diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 40eb010..bbef62a 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -544,7 +544,6 @@ handle_dma_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, struct vfio_user_dma_unmap *dma_unmap) { int ret; - char *bitmap = NULL; char rstr[1024]; assert(vfu_ctx != NULL); @@ -583,22 +582,14 @@ handle_dma_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, * temporary anyway since we're moving dirty page tracking out of * the DMA controller. */ - ret = dma_controller_dirty_page_get(vfu_ctx->dma, - (vfu_dma_addr_t)dma_unmap->addr, - dma_unmap->size, - dma_unmap->bitmap->pgsize, - dma_unmap->bitmap->size, - &bitmap); - if (ret < 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to get dirty page bitmap: %m"); - return -1; - } msg->out_size += sizeof(*dma_unmap->bitmap) + dma_unmap->bitmap->size; } else if (dma_unmap->flags != 0) { vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", dma_unmap->flags); return ERROR_INT(ENOTSUP); } + + msg->out_data = malloc(msg->out_size); if (msg->out_data == NULL) { return ERROR_INT(ENOMEM); @@ -607,7 +598,16 @@ handle_dma_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, if (dma_unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { memcpy(msg->out_data + sizeof(*dma_unmap), dma_unmap->bitmap, sizeof(*dma_unmap->bitmap)); - memcpy(msg->out_data + sizeof(*dma_unmap) + sizeof(*dma_unmap->bitmap), bitmap, dma_unmap->bitmap->size); + ret = dma_controller_dirty_page_get(vfu_ctx->dma, + (vfu_dma_addr_t)dma_unmap->addr, + dma_unmap->size, + dma_unmap->bitmap->pgsize, + dma_unmap->bitmap->size, + msg->out_data + sizeof(*dma_unmap) + sizeof(*dma_unmap->bitmap)); + if (ret < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to get dirty page bitmap: %m"); + return -1; + } } ret = dma_controller_remove_region(vfu_ctx->dma, @@ -655,64 +655,58 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) struct vfio_user_dirty_pages *dirty_pages_out; struct vfio_user_bitmap_range *range_in; struct vfio_user_bitmap_range *range_out; - char *bitmap; size_t argsz; int ret; - if (msg->in_size < sizeof(*dirty_pages_in) + sizeof(*range_in)) { - vfu_log(vfu_ctx, LOG_ERR, "invalid message size %zu", msg->in_size); - return ERROR_INT(EINVAL); - } dirty_pages_in = msg->in_data; - range_in = msg->in_data + sizeof(*dirty_pages_in); - ret = dma_controller_dirty_page_get(vfu_ctx->dma, - (vfu_dma_addr_t)range_in->iova, - range_in->size, range_in->bitmap.pgsize, - range_in->bitmap.size, &bitmap); - if (ret != 0) { - vfu_log(vfu_ctx, LOG_WARNING, - "failed to get dirty bitmap from DMA controller: %m"); - return -1; + if (msg->in_size < sizeof(*dirty_pages_in) + sizeof(*range_in) + || dirty_pages_in->argsz < sizeof(*dirty_pages_out)) { + vfu_log(vfu_ctx, LOG_ERR, "invalid message size=%zu argsz=%u", + msg->in_size, dirty_pages_in->argsz); + return ERROR_INT(EINVAL); } + range_in = msg->in_data + sizeof(*dirty_pages_in); + /* NB: this is bound by MAX_DMA_SIZE. */ argsz = sizeof(*dirty_pages_out) + sizeof(*range_out) + range_in->bitmap.size; - - /* - * If the reply doesn't fit, reply with just the dirty pages header, giving - * the needed argsz. Typically this shouldn't happen, as the client knows - * the needed reply size and has already provided the correct bitmap size. - */ - if (dirty_pages_in->argsz >= argsz) { - msg->out_size = argsz; - } else { - msg->out_size = sizeof(*dirty_pages_out); - } - + msg->out_size = MIN(dirty_pages_in->argsz, argsz); msg->out_data = malloc(msg->out_size); - if (msg->out_data == NULL) { return -1; } - dirty_pages_out = msg->out_data; - memcpy(dirty_pages_out, dirty_pages_in, sizeof (*dirty_pages_out)); + memcpy(dirty_pages_out, dirty_pages_in, sizeof(*dirty_pages_out)); dirty_pages_out->argsz = argsz; + /* + * If the reply doesn't fit, reply with just the dirty pages header, giving + * the needed argsz. Typically this shouldn't happen, as the client knows + * the needed reply size and has already provided the correct bitmap size. + */ if (dirty_pages_in->argsz >= argsz) { - char *bitmap_out; - + void *bitmap_out = msg->out_data + sizeof(*dirty_pages_out) + + sizeof(*range_out); range_out = msg->out_data + sizeof(*dirty_pages_out); - memcpy(range_out, range_in, sizeof (*range_out)); - - bitmap_out = msg->out_data + sizeof(*dirty_pages_out) - + sizeof(*range_out); - memcpy(bitmap_out, bitmap, range_in->bitmap.size); + memcpy(range_out, range_in, sizeof(*range_out)); + ret = dma_controller_dirty_page_get(vfu_ctx->dma, + (vfu_dma_addr_t)range_in->iova, + range_in->size, + range_in->bitmap.pgsize, + range_in->bitmap.size, bitmap_out); + if (ret != 0) { + ret = errno; + vfu_log(vfu_ctx, LOG_WARNING, + "failed to get dirty bitmap from DMA controller: %m"); + free(msg->out_data); + msg->out_data = NULL; + msg->out_size = 0; + return ERROR_INT(ret); + } } - return 0; } @@ -1588,12 +1582,12 @@ vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, } EXPORT int -vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, - struct iovec *iov, int cnt) +vfu_map_sg(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, struct iovec *iov, int cnt, + int flags) { int ret; - if (unlikely(vfu_ctx->dma_unregister == NULL)) { + if (unlikely(vfu_ctx->dma_unregister == NULL) || flags != 0) { return ERROR_INT(EINVAL); } @@ -1606,7 +1600,7 @@ vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, } EXPORT void -vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) +vfu_unmap_sg(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, struct iovec *iov, int cnt) { if (unlikely(vfu_ctx->dma_unregister == NULL)) { return; @@ -1630,6 +1624,10 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, assert(vfu_ctx != NULL); assert(sg != NULL); + if (cmd == VFIO_USER_DMA_WRITE && !sg->writeable) { + return ERROR_INT(EPERM); + } + rlen = sizeof(struct vfio_user_dma_region_access) + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); @@ -1715,4 +1713,10 @@ vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data); } +EXPORT bool +vfu_sg_is_mappable(vfu_ctx_t *vfu_ctx, dma_sg_t *sg) +{ + return dma_sg_is_mappable(vfu_ctx->dma, sg); +} + /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |