aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Henderson <william.henderson@nutanix.com>2023-07-20 15:32:38 +0000
committerJohn Levon <john.levon@nutanix.com>2023-09-15 12:59:39 +0100
commit8c0439512912305ff6ea0cde3ed2daa7f9dd3f82 (patch)
tree022f12c9604fcb4b2a9839c467b84fa651d6514b
parentb7d248fa1834fd627f16337c4e56218cef4210ef (diff)
downloadlibvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.zip
libvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.tar.gz
libvfio-user-8c0439512912305ff6ea0cde3ed2daa7f9dd3f82.tar.bz2
feat: begin implementing DMA dirty page logging (WIP)
Signed-off-by: William Henderson <william.henderson@nutanix.com>
-rw-r--r--include/vfio-user.h32
-rw-r--r--lib/dma.c141
-rw-r--r--lib/dma.h12
-rw-r--r--lib/libvfio-user.c225
-rw-r--r--lib/migration.c15
-rw-r--r--lib/migration.h4
6 files changed, 248 insertions, 181 deletions
diff --git a/include/vfio-user.h b/include/vfio-user.h
index 39b1b32..aec5c06 100644
--- a/include/vfio-user.h
+++ b/include/vfio-user.h
@@ -206,25 +206,31 @@ typedef struct vfio_user_region_io_fds_reply {
} sub_regions[];
} __attribute__((packed)) vfio_user_region_io_fds_reply_t;
+struct vfio_user_device_feature_dma_logging_range {
+ uint64_t iova;
+ uint64_t length;
+} __attribute__((packed));
-/* Analogous to vfio_iommu_type1_dirty_bitmap. */
-struct vfio_user_dirty_pages {
- uint32_t argsz;
-#ifndef VFIO_IOMMU_DIRTY_PAGES_FLAG_START
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
-#endif
- uint32_t flags;
+struct vfio_user_device_feature_dma_logging_control {
+ uint64_t page_size;
+ uint32_t num_ranges;
+ uint32_t reserved;
+ struct vfio_user_device_feature_dma_logging_range ranges[];
} __attribute__((packed));
-/* Analogous to struct vfio_iommu_type1_dirty_bitmap_get. */
-struct vfio_user_bitmap_range {
+struct vfio_user_device_feature_dma_logging_report {
uint64_t iova;
- uint64_t size;
- struct vfio_user_bitmap bitmap;
+ uint64_t length;
+ uint64_t page_size;
+ uint8_t bitmap[];
} __attribute__((packed));
+#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+#endif
+
/* Analogous to vfio_device_feature */
struct vfio_user_device_feature {
uint32_t argsz;
diff --git a/lib/dma.c b/lib/dma.c
index beefeac..d70628a 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -288,6 +288,147 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize)
return 0;
}
+static dma_memory_region_t *
+find_region(dma_controller_t *dma, uint64_t iova, uint64_t length) {
+ for (size_t i = 0; i < (size_t)dma->nregions; i++) {
+ if ((uint64_t)dma->regions[i].info.iova.iov_base == iova &&
+ dma->regions[i].info.iova.iov_len == length) {
+ return &dma->regions[i];
+ }
+ }
+
+ return NULL;
+}
+
+bool
+is_dma_feature(uint32_t feature) {
+ switch (feature) {
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+ return true;
+ }
+
+ return false;
+}
+
+ssize_t
+dma_get_request_bitmap_size(size_t length, void *buf) {
+ if (length != sizeof(struct vfio_user_device_feature_dma_logging_report)) {
+ return ERROR_INT(EINVAL);
+ }
+
+ struct vfio_user_device_feature_dma_logging_report *req = buf;
+
+ return get_bitmap_size(req->length, req->page_size);
+}
+
+ssize_t
+dma_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf)
+{
+ assert(vfu_ctx != NULL);
+
+ struct dma_controller *dma = vfu_ctx->dma;
+
+ assert(dma != NULL);
+
+ struct vfio_user_device_feature_dma_logging_report *req = buf;
+
+ ssize_t bitmap_size = get_bitmap_size(req->length, req->page_size);
+
+ int ret;
+
+ ret = dma_controller_dirty_page_get(dma,
+ req->iova,
+ req->length,
+ req->page_size,
+ bitmap_size,
+ buf + sizeof(struct vfio_user_device_feature_dma_logging_report));
+
+ return -1;
+}
+
+/*
+ * Currently we only support IOVA ranges that correspond exactly to a region.
+ * Also, once DMA logging has been started on a certain subset of the regions,
+ * it must be stopped on all of those regions at the same time before any other
+ * regions can start logging.
+ */
+ssize_t
+dma_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf)
+{
+ assert(vfu_ctx != NULL);
+
+ struct dma_controller *dma = vfu_ctx->dma;
+
+ assert(dma != NULL);
+
+ struct vfio_user_device_feature_dma_logging_control *req = buf;
+
+ if (feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_START) {
+ if (req->page_size == 0) {
+ return ERROR_INT(EINVAL);
+ }
+
+ if (dma->dirty_pgsize > 0) {
+ if (dma->dirty_pgsize != req->page_size) {
+ return ERROR_INT(EINVAL);
+ }
+ return 0;
+ }
+
+ for (size_t i = 0; i < req->num_ranges; i++) {
+ dma_memory_region_t *region = find_region(dma, req->ranges[i].iova,
+ req->ranges[i].length);
+
+ if (region == NULL) {
+ return ERROR_INT(EINVAL);
+ }
+
+ if (dirty_page_logging_start_on_region(region,
+ req->page_size) < 0) {
+ int _errno = errno;
+ size_t j;
+
+ for (j = 0; j < i; j++) {
+ region = find_region(dma, req->ranges[i].iova,
+ req->ranges[i].length);
+ free(region->dirty_bitmap);
+ region->dirty_bitmap = NULL;
+ }
+
+ return ERROR_INT(_errno);
+ }
+ }
+
+ dma->dirty_pgsize = req->page_size;
+
+ vfu_log(dma->vfu_ctx, LOG_DEBUG, "dirty pages: started logging");
+ } else if (feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP) {
+ if (dma->dirty_pgsize == 0) {
+ return 0;
+ }
+
+ for (size_t i = 0; i < req->num_ranges; i++) {
+ dma_memory_region_t *region = find_region(dma, req->ranges[i].iova,
+ req->ranges[i].length);
+
+ if (region == NULL || region->dirty_bitmap == NULL) {
+ return ERROR_INT(EINVAL);
+ }
+
+ free(region->dirty_bitmap);
+ region->dirty_bitmap = NULL;
+ }
+
+ dma->dirty_pgsize = 0;
+
+ vfu_log(dma->vfu_ctx, LOG_DEBUG, "dirty pages: stopped logging");
+ }
+
+ return 0;
+}
+
int
MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma,
vfu_dma_addr_t dma_addr, uint64_t size,
diff --git a/lib/dma.h b/lib/dma.h
index 9687f49..f7091f7 100644
--- a/lib/dma.h
+++ b/lib/dma.h
@@ -135,6 +135,18 @@ MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma,
MOCK_DECLARE(void, dma_controller_unmap_region, dma_controller_t *dma,
dma_memory_region_t *region);
+bool
+is_dma_feature(uint32_t feature);
+
+ssize_t
+dma_get_request_bitmap_size(size_t length, void *buf);
+
+ssize_t
+dma_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf);
+
+ssize_t
+dma_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf);
+
// Helper for dma_addr_to_sgl() slow path.
int
_dma_addr_sg_split(const dma_controller_t *dma,
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 99914d2..6bc6623 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -906,123 +906,22 @@ device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason)
return 0;
}
-static int
-handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
-{
- struct vfio_user_dirty_pages *dirty_pages_in;
- struct vfio_user_dirty_pages *dirty_pages_out;
- struct vfio_user_bitmap_range *range_in;
- struct vfio_user_bitmap_range *range_out;
- size_t argsz;
- int ret;
-
-
- dirty_pages_in = msg->in.iov.iov_base;
-
- if (msg->in.iov.iov_len < sizeof(*dirty_pages_in) + sizeof(*range_in) ||
- dirty_pages_in->argsz > SERVER_MAX_DATA_XFER_SIZE ||
- dirty_pages_in->argsz < sizeof(*dirty_pages_out)) {
- vfu_log(vfu_ctx, LOG_ERR, "invalid message size=%zu argsz=%u",
- msg->in.iov.iov_len, dirty_pages_in->argsz);
- return ERROR_INT(EINVAL);
- }
-
- range_in = msg->in.iov.iov_base + sizeof(*dirty_pages_in);
-
- /*
- * range_in is client-controlled, but we only need to protect against
- * overflow here: we'll take MIN() against a validated value next, and
- * dma_controller_dirty_page_get() will validate the actual ->bitmap.size
- * value later, anyway.
- */
- argsz = satadd_u64(sizeof(*dirty_pages_out) + sizeof(*range_out),
- range_in->bitmap.size);
-
- msg->out.iov.iov_len = MIN(dirty_pages_in->argsz, argsz);
- msg->out.iov.iov_base = malloc(msg->out.iov.iov_len);
- if (msg->out.iov.iov_base == NULL) {
- return -1;
- }
- dirty_pages_out = msg->out.iov.iov_base;
- memcpy(dirty_pages_out, dirty_pages_in, sizeof(*dirty_pages_out));
- dirty_pages_out->argsz = argsz;
-
- /*
- * If the reply doesn't fit, reply with just the dirty pages header, giving
- * the needed argsz. Typically this shouldn't happen, as the client knows
- * the needed reply size and has already provided the correct bitmap size.
- */
- if (dirty_pages_in->argsz >= argsz) {
- void *bitmap_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out)
- + sizeof(*range_out);
- range_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out);
- memcpy(range_out, range_in, sizeof(*range_out));
- ret = dma_controller_dirty_page_get(vfu_ctx->dma,
- (vfu_dma_addr_t)(uintptr_t)range_in->iova,
- range_in->size,
- range_in->bitmap.pgsize,
- range_in->bitmap.size, bitmap_out);
- if (ret != 0) {
- ret = errno;
- vfu_log(vfu_ctx, LOG_WARNING,
- "failed to get dirty bitmap from DMA controller: %m");
- free(msg->out.iov.iov_base);
- msg->out.iov.iov_base = NULL;
- msg->out.iov.iov_len = 0;
- return ERROR_INT(ret);
- }
- } else {
- vfu_log(vfu_ctx, LOG_ERR,
- "dirty pages: get [%#llx, %#llx): buffer too small (%u < %zu)",
- (ull_t)range_in->iova, (ull_t)range_in->iova + range_in->size,
- dirty_pages_in->argsz, argsz);
- }
-
- return 0;
-}
-
-static int
-handle_dirty_pages(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
-{
- struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base;
- int ret;
-
- assert(vfu_ctx != NULL);
- assert(msg != NULL);
-
- if (msg->in.iov.iov_len < sizeof(*dirty_pages) ||
- dirty_pages->argsz < sizeof(*dirty_pages)) {
- vfu_log(vfu_ctx, LOG_ERR, "invalid message size %zu", msg->in.iov.iov_len);
- return ERROR_INT(EINVAL);
- }
-
- if (vfu_ctx->migration == NULL) {
- vfu_log(vfu_ctx, LOG_ERR, "migration not configured");
- return ERROR_INT(ENOTSUP);
- }
-
- switch (dirty_pages->flags) {
- case VFIO_IOMMU_DIRTY_PAGES_FLAG_START:
- ret = dma_controller_dirty_page_logging_start(vfu_ctx->dma,
- migration_get_pgsize(vfu_ctx->migration));
- break;
-
- case VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP:
- dma_controller_dirty_page_logging_stop(vfu_ctx->dma);
- ret = 0;
- break;
-
- case VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP:
- ret = handle_dirty_pages_get(vfu_ctx, msg);
- break;
-
- default:
- vfu_log(vfu_ctx, LOG_ERR, "bad flags %#x", dirty_pages->flags);
- ret = ERROR_INT(EINVAL);
- break;
- }
-
- return ret;
+static uint32_t
+device_feature_flags(uint32_t feature) {
+ switch (feature) {
+ case VFIO_DEVICE_FEATURE_MIGRATION:
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+ return VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE;
+ case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
+ return VFIO_DEVICE_FEATURE_GET
+ | VFIO_DEVICE_FEATURE_SET
+ | VFIO_DEVICE_FEATURE_PROBE;
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+ return VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_PROBE;
+ default:
+ return 0;
+ };
}
static int
@@ -1039,13 +938,15 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
struct vfio_user_device_feature *req = msg->in.iov.iov_base;
uint32_t supported_flags =
- migration_feature_flags(req->flags & VFIO_DEVICE_FEATURE_MASK);
+ device_feature_flags(req->flags & VFIO_DEVICE_FEATURE_MASK);
if ((req->flags & supported_flags) !=
(req->flags & ~VFIO_DEVICE_FEATURE_MASK) || supported_flags == 0) {
return -EINVAL;
}
+ uint32_t feature = req->flags & VFIO_DEVICE_FEATURE_MASK;
+
ssize_t ret;
if (req->flags & VFIO_DEVICE_FEATURE_PROBE) {
@@ -1061,31 +962,52 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ret = 0;
} else if (req->flags & VFIO_DEVICE_FEATURE_GET) {
- // all supported outgoing data is currently the same size as
- // vfio_user_device_feature_migration
- msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature)
- + sizeof(struct vfio_user_device_feature_migration);
- msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len);
+ if (is_migration_feature(feature)) {
+ // all supported outgoing data is currently the same size as
+ // vfio_user_device_feature_migration
+ msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature)
+ + sizeof(struct vfio_user_device_feature_migration);
+ msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len);
- if (msg->out.iov.iov_base == NULL) {
- return -1;
- }
+ if (msg->out.iov.iov_base == NULL) {
+ return -1;
+ }
- memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base,
- sizeof(struct vfio_user_device_feature));
+ memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base,
+ sizeof(struct vfio_user_device_feature));
- ret = migration_feature_get(vfu_ctx,
- req->flags & VFIO_DEVICE_FEATURE_MASK,
- msg->out.iov.iov_base +
- sizeof(struct vfio_user_device_feature));
+ ret = migration_feature_get(vfu_ctx, feature,
+ msg->out.iov.iov_base +
+ sizeof(struct vfio_user_device_feature));
- struct vfio_user_device_feature *res = msg->out.iov.iov_base;
+ struct vfio_user_device_feature *res = msg->out.iov.iov_base;
- if (ret < 0) {
- msg->out.iov.iov_len = 0;
- } else {
- res->argsz = sizeof(struct vfio_user_device_feature)
- + sizeof(struct vfio_user_device_feature_migration);
+ if (ret < 0) {
+ msg->out.iov.iov_len = 0;
+ } else {
+ res->argsz = sizeof(struct vfio_user_device_feature)
+ + sizeof(struct vfio_user_device_feature_migration);
+ }
+ } else if (is_dma_feature(feature)) {
+ ssize_t bitmap_size = dma_get_request_bitmap_size(
+ req->argsz - sizeof(struct vfio_user_device_feature),
+ req->data
+ );
+
+ msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature)
+ + bitmap_size;
+ msg->out.iov.iov_base = malloc(msg->out.iov.iov_len);
+
+ if (msg->out.iov.iov_base == NULL) {
+ return -1;
+ }
+
+ memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base,
+ sizeof(struct vfio_user_device_feature));
+
+ struct vfio_user_device_feature *res = msg->out.iov.iov_base;
+
+ ret = dma_feature_get(vfu_ctx, feature, res->data);
}
} else if (req->flags & VFIO_DEVICE_FEATURE_SET) {
msg->out.iov.iov_base = malloc(msg->in.iov.iov_len);
@@ -1098,9 +1020,13 @@ handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base,
msg->out.iov.iov_len);
- ret = migration_feature_set(vfu_ctx,
- req->flags & VFIO_DEVICE_FEATURE_MASK,
- req->data);
+ struct vfio_user_device_feature *res = msg->out.iov.iov_base;
+
+ if (is_migration_feature(feature)) {
+ ret = migration_feature_set(vfu_ctx, feature, res->data);
+ } else if (is_dma_feature(feature)) {
+ ret = dma_feature_set(vfu_ctx, feature, res->data);
+ }
}
return ret;
@@ -1267,12 +1193,7 @@ handle_request(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
break;
case VFIO_USER_DIRTY_PAGES:
- // FIXME: don't allow migration calls if migration == NULL
- if (vfu_ctx->dma != NULL) {
- ret = handle_dirty_pages(vfu_ctx, msg);
- } else {
- ret = 0;
- }
+ vfu_log(vfu_ctx, LOG_ERR, "VFIO_USER_DIRTY_PAGES deprecated");
break;
case VFIO_USER_DEVICE_FEATURE:
@@ -1438,16 +1359,6 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg)
case VFIO_USER_DEVICE_RESET:
return true;
- case VFIO_USER_DIRTY_PAGES: {
- struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base;
-
- if (msg->in.iov.iov_len < sizeof(*dirty_pages)) {
- return false;
- }
-
- return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP);
- }
-
case VFIO_USER_REGION_WRITE:
if (msg->in.iov.iov_len < sizeof(*reg)) {
/*
diff --git a/lib/migration.c b/lib/migration.c
index 845144e..2f80ac0 100644
--- a/lib/migration.c
+++ b/lib/migration.c
@@ -172,18 +172,15 @@ MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify);
}
-uint32_t
-migration_feature_flags(uint32_t feature) {
+bool
+is_migration_feature(uint32_t feature) {
switch (feature) {
case VFIO_DEVICE_FEATURE_MIGRATION:
- return VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE;
case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
- return VFIO_DEVICE_FEATURE_GET
- | VFIO_DEVICE_FEATURE_SET
- | VFIO_DEVICE_FEATURE_PROBE;
- default:
- return 0;
- };
+ return true;
+ }
+
+ return false;
}
ssize_t
diff --git a/lib/migration.h b/lib/migration.h
index 4411228..221a345 100644
--- a/lib/migration.h
+++ b/lib/migration.h
@@ -48,8 +48,8 @@ struct migration *
init_migration(const vfu_migration_callbacks_t *callbacks,
uint64_t flags, int *err);
-uint32_t
-migration_feature_flags(uint32_t feature);
+bool
+is_migration_feature(uint32_t feature);
ssize_t
migration_feature_get(vfu_ctx_t *vfu_ctx, uint32_t feature, void *buf);