diff options
author | William Henderson <william.henderson@nutanix.com> | 2023-07-20 15:32:38 +0000 |
---|---|---|
committer | John Levon <john.levon@nutanix.com> | 2023-09-15 12:59:39 +0100 |
commit | 8c0439512912305ff6ea0cde3ed2daa7f9dd3f82 (patch) | |
tree | 022f12c9604fcb4b2a9839c467b84fa651d6514b | |
parent | b7d248fa1834fd627f16337c4e56218cef4210ef (diff) | |
download | libvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.zip libvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.tar.gz libvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.tar.bz2 |
feat: begin implementing DMA dirty page logging (WIP)
Signed-off-by: William Henderson <william.henderson@nutanix.com>
-rw-r--r-- | include/vfio-user.h | 32 | ||||
-rw-r--r-- | lib/dma.c | 141 | ||||
-rw-r--r-- | lib/dma.h | 12 | ||||
-rw-r--r-- | lib/libvfio-user.c | 225 | ||||
-rw-r--r-- | lib/migration.c | 15 | ||||
-rw-r--r-- | lib/migration.h | 4 |
6 files changed, 248 insertions, 181 deletions
diff --git a/include/vfio-user.h b/include/vfio-user.h index 39b1b32..aec5c06 100644 --- a/include/vfio-user.h +++ b/include/vfio-user.h @@ -206,25 +206,31 @@ typedef struct vfio_user_region_io_fds_reply { } sub_regions[]; } __attribute__((packed)) vfio_user_region_io_fds_reply_t; +struct vfio_user_device_feature_dma_logging_range { + uint64_t iova; + uint64_t length; +} __attribute__((packed)); -/* Analogous to vfio_iommu_type1_dirty_bitmap. */ -struct vfio_user_dirty_pages { - uint32_t argsz; -#ifndef VFIO_IOMMU_DIRTY_PAGES_FLAG_START -#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) -#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) -#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) -#endif - uint32_t flags; +struct vfio_user_device_feature_dma_logging_control { + uint64_t page_size; + uint32_t num_ranges; + uint32_t reserved; + struct vfio_user_device_feature_dma_logging_range ranges[]; } __attribute__((packed)); -/* Analogous to struct vfio_iommu_type1_dirty_bitmap_get. */ -struct vfio_user_bitmap_range { +struct vfio_user_device_feature_dma_logging_report { uint64_t iova; - uint64_t size; - struct vfio_user_bitmap bitmap; + uint64_t length; + uint64_t page_size; + uint8_t bitmap[]; } __attribute__((packed)); +#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 +#endif + /* Analogous to vfio_device_feature */ struct vfio_user_device_feature { uint32_t argsz; @@ -288,6 +288,147 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) return 0; } +static dma_memory_region_t * +find_region(dma_controller_t *dma, uint64_t iova, uint64_t length) { + for (size_t i = 0; i < (size_t)dma->nregions; i++) { + if ((uint64_t)dma->regions[i].info.iova.iov_base == iova && + dma->regions[i].info.iova.iov_len == length) { + return &dma->regions[i]; + } + } + + return NULL; +} + +bool +is_dma_feature(uint32_t feature) { + switch (feature) { + case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: + return true; + } + + return false; +} + +ssize_t +dma_get_request_bitmap_size(size_t length, void *buf) { + if (length != sizeof(struct vfio_user_device_feature_dma_logging_report)) { + return ERROR_INT(EINVAL); + } + + struct vfio_user_device_feature_dma_logging_report *req = buf; + + return get_bitmap_size(req->length, req->page_size); +} + +ssize_t +dma_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf) +{ + assert(vfu_ctx != NULL); + + struct dma_controller *dma = vfu_ctx->dma; + + assert(dma != NULL); + + struct vfio_user_device_feature_dma_logging_report *req = buf; + + ssize_t bitmap_size = get_bitmap_size(req->length, req->page_size); + + int ret; + + ret = dma_controller_dirty_page_get(dma, + req->iova, + req->length, + req->page_size, + bitmap_size, + buf + sizeof(struct vfio_user_device_feature_dma_logging_report)); + + return -1; +} + +/* + * Currently we only support IOVA ranges that correspond exactly to a region. + * Also, once DMA logging has been started on a certain subset of the regions, + * it must be stopped on all of those regions at the same time before any other + * regions can start logging. + */ +ssize_t +dma_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf) +{ + assert(vfu_ctx != NULL); + + struct dma_controller *dma = vfu_ctx->dma; + + assert(dma != NULL); + + struct vfio_user_device_feature_dma_logging_control *req = buf; + + if (feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_START) { + if (req->page_size == 0) { + return ERROR_INT(EINVAL); + } + + if (dma->dirty_pgsize > 0) { + if (dma->dirty_pgsize != req->page_size) { + return ERROR_INT(EINVAL); + } + return 0; + } + + for (size_t i = 0; i < req->num_ranges; i++) { + dma_memory_region_t *region = find_region(dma, req->ranges[i].iova, + req->ranges[i].length); + + if (region == NULL) { + return ERROR_INT(EINVAL); + } + + if (dirty_page_logging_start_on_region(region, + req->page_size) < 0) { + int _errno = errno; + size_t j; + + for (j = 0; j < i; j++) { + region = find_region(dma, req->ranges[i].iova, + req->ranges[i].length); + free(region->dirty_bitmap); + region->dirty_bitmap = NULL; + } + + return ERROR_INT(_errno); + } + } + + dma->dirty_pgsize = req->page_size; + + vfu_log(dma->vfu_ctx, LOG_DEBUG, "dirty pages: started logging"); + } else if (feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP) { + if (dma->dirty_pgsize == 0) { + return 0; + } + + for (size_t i = 0; i < req->num_ranges; i++) { + dma_memory_region_t *region = find_region(dma, req->ranges[i].iova, + req->ranges[i].length); + + if (region == NULL || region->dirty_bitmap == NULL) { + return ERROR_INT(EINVAL); + } + + free(region->dirty_bitmap); + region->dirty_bitmap = NULL; + } + + dma->dirty_pgsize = 0; + + vfu_log(dma->vfu_ctx, LOG_DEBUG, "dirty pages: stopped logging"); + } + + return 0; +} + int MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, @@ -135,6 +135,18 @@ MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, MOCK_DECLARE(void, dma_controller_unmap_region, dma_controller_t *dma, dma_memory_region_t *region); +bool +is_dma_feature(uint32_t feature); + +ssize_t +dma_get_request_bitmap_size(size_t length, void *buf); + +ssize_t +dma_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf); + +ssize_t +dma_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf); + // Helper for dma_addr_to_sgl() slow path. int _dma_addr_sg_split(const dma_controller_t *dma, diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 99914d2..6bc6623 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -906,123 +906,22 @@ device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason) return 0; } -static int -handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) -{ - struct vfio_user_dirty_pages *dirty_pages_in; - struct vfio_user_dirty_pages *dirty_pages_out; - struct vfio_user_bitmap_range *range_in; - struct vfio_user_bitmap_range *range_out; - size_t argsz; - int ret; - - - dirty_pages_in = msg->in.iov.iov_base; - - if (msg->in.iov.iov_len < sizeof(*dirty_pages_in) + sizeof(*range_in) || - dirty_pages_in->argsz > SERVER_MAX_DATA_XFER_SIZE || - dirty_pages_in->argsz < sizeof(*dirty_pages_out)) { - vfu_log(vfu_ctx, LOG_ERR, "invalid message size=%zu argsz=%u", - msg->in.iov.iov_len, dirty_pages_in->argsz); - return ERROR_INT(EINVAL); - } - - range_in = msg->in.iov.iov_base + sizeof(*dirty_pages_in); - - /* - * range_in is client-controlled, but we only need to protect against - * overflow here: we'll take MIN() against a validated value next, and - * dma_controller_dirty_page_get() will validate the actual ->bitmap.size - * value later, anyway. - */ - argsz = satadd_u64(sizeof(*dirty_pages_out) + sizeof(*range_out), - range_in->bitmap.size); - - msg->out.iov.iov_len = MIN(dirty_pages_in->argsz, argsz); - msg->out.iov.iov_base = malloc(msg->out.iov.iov_len); - if (msg->out.iov.iov_base == NULL) { - return -1; - } - dirty_pages_out = msg->out.iov.iov_base; - memcpy(dirty_pages_out, dirty_pages_in, sizeof(*dirty_pages_out)); - dirty_pages_out->argsz = argsz; - - /* - * If the reply doesn't fit, reply with just the dirty pages header, giving - * the needed argsz. Typically this shouldn't happen, as the client knows - * the needed reply size and has already provided the correct bitmap size. - */ - if (dirty_pages_in->argsz >= argsz) { - void *bitmap_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out) - + sizeof(*range_out); - range_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out); - memcpy(range_out, range_in, sizeof(*range_out)); - ret = dma_controller_dirty_page_get(vfu_ctx->dma, - (vfu_dma_addr_t)(uintptr_t)range_in->iova, - range_in->size, - range_in->bitmap.pgsize, - range_in->bitmap.size, bitmap_out); - if (ret != 0) { - ret = errno; - vfu_log(vfu_ctx, LOG_WARNING, - "failed to get dirty bitmap from DMA controller: %m"); - free(msg->out.iov.iov_base); - msg->out.iov.iov_base = NULL; - msg->out.iov.iov_len = 0; - return ERROR_INT(ret); - } - } else { - vfu_log(vfu_ctx, LOG_ERR, - "dirty pages: get [%#llx, %#llx): buffer too small (%u < %zu)", - (ull_t)range_in->iova, (ull_t)range_in->iova + range_in->size, - dirty_pages_in->argsz, argsz); - } - - return 0; -} - -static int -handle_dirty_pages(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) -{ - struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; - int ret; - - assert(vfu_ctx != NULL); - assert(msg != NULL); - - if (msg->in.iov.iov_len < sizeof(*dirty_pages) || - dirty_pages->argsz < sizeof(*dirty_pages)) { - vfu_log(vfu_ctx, LOG_ERR, "invalid message size %zu", msg->in.iov.iov_len); - return ERROR_INT(EINVAL); - } - - if (vfu_ctx->migration == NULL) { - vfu_log(vfu_ctx, LOG_ERR, "migration not configured"); - return ERROR_INT(ENOTSUP); - } - - switch (dirty_pages->flags) { - case VFIO_IOMMU_DIRTY_PAGES_FLAG_START: - ret = dma_controller_dirty_page_logging_start(vfu_ctx->dma, - migration_get_pgsize(vfu_ctx->migration)); - break; - - case VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP: - dma_controller_dirty_page_logging_stop(vfu_ctx->dma); - ret = 0; - break; - - case VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: - ret = handle_dirty_pages_get(vfu_ctx, msg); - break; - - default: - vfu_log(vfu_ctx, LOG_ERR, "bad flags %#x", dirty_pages->flags); - ret = ERROR_INT(EINVAL); - break; - } - - return ret; +static uint32_t +device_feature_flags(uint32_t feature) { + switch (feature) { + case VFIO_DEVICE_FEATURE_MIGRATION: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: + return VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE; + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + return VFIO_DEVICE_FEATURE_GET + | VFIO_DEVICE_FEATURE_SET + | VFIO_DEVICE_FEATURE_PROBE; + case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: + return VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_PROBE; + default: + return 0; + }; } static int @@ -1039,13 +938,15 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) struct vfio_user_device_feature *req = msg->in.iov.iov_base; uint32_t supported_flags = - migration_feature_flags(req->flags & VFIO_DEVICE_FEATURE_MASK); + device_feature_flags(req->flags & VFIO_DEVICE_FEATURE_MASK); if ((req->flags & supported_flags) != (req->flags & ~VFIO_DEVICE_FEATURE_MASK) || supported_flags == 0) { return -EINVAL; } + uint32_t feature = req->flags & VFIO_DEVICE_FEATURE_MASK; + ssize_t ret; if (req->flags & VFIO_DEVICE_FEATURE_PROBE) { @@ -1061,31 +962,52 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ret = 0; } else if (req->flags & VFIO_DEVICE_FEATURE_GET) { - // all supported outgoing data is currently the same size as - // vfio_user_device_feature_migration - msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature) - + sizeof(struct vfio_user_device_feature_migration); - msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); + if (is_migration_feature(feature)) { + // all supported outgoing data is currently the same size as + // vfio_user_device_feature_migration + msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature) + + sizeof(struct vfio_user_device_feature_migration); + msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); - if (msg->out.iov.iov_base == NULL) { - return -1; - } + if (msg->out.iov.iov_base == NULL) { + return -1; + } - memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, - sizeof(struct vfio_user_device_feature)); + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + sizeof(struct vfio_user_device_feature)); - ret = migration_feature_get(vfu_ctx, - req->flags & VFIO_DEVICE_FEATURE_MASK, - msg->out.iov.iov_base + - sizeof(struct vfio_user_device_feature)); + ret = migration_feature_get(vfu_ctx, feature, + msg->out.iov.iov_base + + sizeof(struct vfio_user_device_feature)); - struct vfio_user_device_feature *res = msg->out.iov.iov_base; + struct vfio_user_device_feature *res = msg->out.iov.iov_base; - if (ret < 0) { - msg->out.iov.iov_len = 0; - } else { - res->argsz = sizeof(struct vfio_user_device_feature) - + sizeof(struct vfio_user_device_feature_migration); + if (ret < 0) { + msg->out.iov.iov_len = 0; + } else { + res->argsz = sizeof(struct vfio_user_device_feature) + + sizeof(struct vfio_user_device_feature_migration); + } + } else if (is_dma_feature(feature)) { + ssize_t bitmap_size = dma_get_request_bitmap_size( + req->argsz - sizeof(struct vfio_user_device_feature), + req->data + ); + + msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature) + + bitmap_size; + msg->out.iov.iov_base = malloc(msg->out.iov.iov_len); + + if (msg->out.iov.iov_base == NULL) { + return -1; + } + + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + sizeof(struct vfio_user_device_feature)); + + struct vfio_user_device_feature *res = msg->out.iov.iov_base; + + ret = dma_feature_get(vfu_ctx, feature, res->data); } } else if (req->flags & VFIO_DEVICE_FEATURE_SET) { msg->out.iov.iov_base = malloc(msg->in.iov.iov_len); @@ -1098,9 +1020,13 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, msg->out.iov.iov_len); - ret = migration_feature_set(vfu_ctx, - req->flags & VFIO_DEVICE_FEATURE_MASK, - req->data); + struct vfio_user_device_feature *res = msg->out.iov.iov_base; + + if (is_migration_feature(feature)) { + ret = migration_feature_set(vfu_ctx, feature, res->data); + } else if (is_dma_feature(feature)) { + ret = dma_feature_set(vfu_ctx, feature, res->data); + } } return ret; @@ -1267,12 +1193,7 @@ handle_request(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) break; case VFIO_USER_DIRTY_PAGES: - // FIXME: don't allow migration calls if migration == NULL - if (vfu_ctx->dma != NULL) { - ret = handle_dirty_pages(vfu_ctx, msg); - } else { - ret = 0; - } + vfu_log(vfu_ctx, LOG_ERR, "VFIO_USER_DIRTY_PAGES deprecated"); break; case VFIO_USER_DEVICE_FEATURE: @@ -1438,16 +1359,6 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) case VFIO_USER_DEVICE_RESET: return true; - case VFIO_USER_DIRTY_PAGES: { - struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; - - if (msg->in.iov.iov_len < sizeof(*dirty_pages)) { - return false; - } - - return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP); - } - case VFIO_USER_REGION_WRITE: if (msg->in.iov.iov_len < sizeof(*reg)) { /* diff --git a/lib/migration.c b/lib/migration.c index 845144e..2f80ac0 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -172,18 +172,15 @@ MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify); } -uint32_t -migration_feature_flags(uint32_t feature) { +bool +is_migration_feature(uint32_t feature) { switch (feature) { case VFIO_DEVICE_FEATURE_MIGRATION: - return VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE; case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: - return VFIO_DEVICE_FEATURE_GET - | VFIO_DEVICE_FEATURE_SET - | VFIO_DEVICE_FEATURE_PROBE; - default: - return 0; - }; + return true; + } + + return false; } ssize_t diff --git a/lib/migration.h b/lib/migration.h index 4411228..221a345 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -48,8 +48,8 @@ struct migration * init_migration(const vfu_migration_callbacks_t *callbacks, uint64_t flags, int *err); -uint32_t -migration_feature_flags(uint32_t feature); +bool +is_migration_feature(uint32_t feature); ssize_t migration_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf); |