aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2021-05-20 17:10:51 +0100
committerGitHub <noreply@github.com>2021-05-20 17:10:51 +0100
commitcfe9901919943f14961e1da1c4a823336ff79555 (patch)
tree2b5d8915bbbea8d239684e3334c9926fb910fc2c /lib
parent947941de95bf2c3f723b37151d67fb129fd01841 (diff)
downloadlibvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.zip
libvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.tar.gz
libvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.tar.bz2
migration: various dirty page tracking fixes (#457)
- document how to use a vfio-user device with libvirt - document how to use SPDK's nvmf/vfio-user target with libvirt - replace vfio_bitmap with vfio_user_bitmap and vfio_iommu_type1_dirty_bitmap_get with vfio_user_bitmap_range - fix bug for calculating number of pages needed for dirty page bitmap - align number of bytes for dirty page bitmap to QWORD - add debug messages around dirty page tracking - only support flags=0 when doing DMA unmap - set device state to running after reset - allow region read/write even if device is in stopped state - allow transitioning from stopped/stop-and-copy state to running state - fix unit tests Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com> Reviewed-by: John Levon <john.levon@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/dma.c6
-rw-r--r--lib/libvfio-user.c63
-rw-r--r--lib/migration.c170
-rw-r--r--lib/migration.h7
-rw-r--r--lib/migration_priv.h20
-rw-r--r--lib/private.h7
6 files changed, 190 insertions, 83 deletions
diff --git a/lib/dma.c b/lib/dma.c
index 32f786b..e6d04b3 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -423,7 +423,7 @@ get_bitmap_size(size_t region_size, size_t pgsize)
return ERROR_INT(EINVAL);
}
size_t nr_pages = (region_size / pgsize) + (region_size % pgsize != 0);
- return (nr_pages / CHAR_BIT) + (nr_pages % CHAR_BIT != 0);
+ return ROUND_UP(nr_pages, sizeof(uint64_t) * CHAR_BIT) / CHAR_BIT;
}
int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize)
@@ -511,11 +511,13 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
}
if (pgsize != dma->dirty_pgsize) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %ld", pgsize);
return ERROR_INT(EINVAL);
}
bitmap_size = get_bitmap_size(len, pgsize);
if (bitmap_size < 0) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "failed to get bitmap size");
return bitmap_size;
}
@@ -524,6 +526,8 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
* expects to receive.
*/
if (size != (size_t)bitmap_size) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad bitmap size %ld != %ld", size,
+ bitmap_size);
return ERROR_INT(EINVAL);
}
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 7518a46..c4f6c42 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -543,6 +543,11 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ret = 0;
} else {
+
+ if (region->flags != 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", region->flags);
+ return ERROR_INT(ENOTSUP);
+ }
ret = dma_controller_remove_region(vfu_ctx->dma,
(void *)region->addr,
region->size,
@@ -560,20 +565,34 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
}
static int
-handle_device_reset(vfu_ctx_t *vfu_ctx)
+do_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason)
{
- vfu_log(vfu_ctx, LOG_DEBUG, "Device reset called by client");
+ int ret;
+
if (vfu_ctx->reset != NULL) {
- return vfu_ctx->reset(vfu_ctx, VFU_RESET_DEVICE);
+ ret = vfu_ctx->reset(vfu_ctx, reason);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ if (vfu_ctx->migration != NULL) {
+ return handle_device_state(vfu_ctx, vfu_ctx->migration,
+ VFIO_DEVICE_STATE_RUNNING, false);
}
return 0;
}
-static int
-handle_dirty_pages_get(vfu_ctx_t *vfu_ctx,
- struct iovec **iovecs, size_t *nr_iovecs,
- struct vfio_iommu_type1_dirty_bitmap_get *ranges,
- uint32_t size)
+int
+handle_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason)
+{
+ return do_device_reset(vfu_ctx, reason);
+}
+
+int
+MOCK_DEFINE(handle_dirty_pages_get)(vfu_ctx_t *vfu_ctx,
+ struct iovec **iovecs, size_t *nr_iovecs,
+ struct vfio_user_bitmap_range *ranges,
+ uint32_t size)
{
int ret = EINVAL;
size_t i;
@@ -583,23 +602,25 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx,
assert(nr_iovecs != NULL);
assert(ranges != NULL);
- if (size % sizeof(struct vfio_iommu_type1_dirty_bitmap_get) != 0) {
+ if (size % sizeof(struct vfio_user_bitmap_range) != 0) {
return ERROR_INT(EINVAL);
}
- *nr_iovecs = size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get);
+ *nr_iovecs = size / sizeof(struct vfio_user_bitmap_range);
*iovecs = malloc(*nr_iovecs * sizeof(struct iovec));
if (*iovecs == NULL) {
return -1;
}
for (i = 0; i < *nr_iovecs; i++) {
- struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[i];
+ struct vfio_user_bitmap_range *r = &ranges[i];
ret = dma_controller_dirty_page_get(vfu_ctx->dma,
(vfu_dma_addr_t)r->iova, r->size,
r->bitmap.pgsize, r->bitmap.size,
(char **)&((*iovecs)[i].iov_base));
if (ret != 0) {
ret = errno;
+ vfu_log(vfu_ctx, LOG_WARNING,
+ "failed to get dirty bitmap from DMA controller: %m");
goto out;
}
(*iovecs)[i].iov_len = r->bitmap.size;
@@ -644,7 +665,7 @@ MOCK_DEFINE(handle_dirty_pages)(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
break;
case VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: {
- struct vfio_iommu_type1_dirty_bitmap_get *get;
+ struct vfio_user_bitmap_range *get;
get = (void *)(dirty_bitmap + 1);
ret = handle_dirty_pages_get(vfu_ctx, &msg->out_iovecs,
@@ -829,11 +850,12 @@ MOCK_DEFINE(should_exec_command)(vfu_ctx_t *vfu_ctx, uint16_t cmd)
"bad command %d while device in stop-and-copy state", cmd);
return false;
}
- } else if (device_is_stopped(vfu_ctx->migration) &&
- cmd != VFIO_USER_DIRTY_PAGES) {
- vfu_log(vfu_ctx, LOG_ERR,
- "bad command %d while device in stopped state", cmd);
- return false;
+ } else if (device_is_stopped(vfu_ctx->migration)) {
+ if (!cmd_allowed_when_stopped_and_copying(cmd)) {
+ vfu_log(vfu_ctx, LOG_ERR,
+ "bad command %d while device in stopped state", cmd);
+ return false;
+ }
}
return true;
}
@@ -873,7 +895,8 @@ MOCK_DEFINE(exec_command)(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
break;
case VFIO_USER_DEVICE_RESET:
- ret = handle_device_reset(vfu_ctx);
+ vfu_log(vfu_ctx, LOG_INFO, "device reset by client");
+ ret = handle_device_reset(vfu_ctx, VFU_RESET_DEVICE);
break;
case VFIO_USER_DIRTY_PAGES:
@@ -1091,9 +1114,7 @@ vfu_reset_ctx(vfu_ctx_t *vfu_ctx, const char *reason)
vfu_ctx);
}
- if (vfu_ctx->reset != NULL) {
- vfu_ctx->reset(vfu_ctx, VFU_RESET_LOST_CONN);
- }
+ do_device_reset(vfu_ctx, VFU_RESET_LOST_CONN);
if (vfu_ctx->irqs != NULL) {
irqs_reset(vfu_ctx);
diff --git a/lib/migration.c b/lib/migration.c
index 7294403..8f0706a 100644
--- a/lib/migration.c
+++ b/lib/migration.c
@@ -40,7 +40,7 @@
#include "migration_priv.h"
bool
-vfio_migr_state_transition_is_valid(uint32_t from, uint32_t to)
+MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to)
{
return migr_states[from].state & (1 << to);
}
@@ -98,48 +98,23 @@ init_migration(const vfu_migration_callbacks_t * callbacks,
return migr;
}
-static void
-migr_state_transition(struct migration *migr, enum migr_iter_state state)
+void
+MOCK_DEFINE(migr_state_transition)(struct migration *migr,
+ enum migr_iter_state state)
{
assert(migr != NULL);
/* FIXME validate that state transition */
migr->iter.state = state;
}
-static ssize_t
-handle_device_state(vfu_ctx_t *vfu_ctx, struct migration *migr,
- uint32_t *device_state, bool is_write) {
-
- int ret;
-
- assert(migr != NULL);
- assert(device_state != NULL);
-
- if (!is_write) {
- *device_state = migr->info.device_state;
- return 0;
- }
-
- if (*device_state & ~VFIO_DEVICE_STATE_MASK) {
- vfu_log(vfu_ctx, LOG_ERR, "bad device state %#x", *device_state);
- return ERROR_INT(EINVAL);
- }
-
- if (!vfio_migr_state_transition_is_valid(migr->info.device_state,
- *device_state)) {
- vfu_log(vfu_ctx, LOG_ERR, "bad transition from state %s to state %s",
- migr_states[migr->info.device_state].name,
- migr_states[*device_state].name);
- return ERROR_INT(EINVAL);
- }
-
- switch (*device_state) {
+vfu_migr_state_t
+MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t device_state)
+{
+ switch (device_state) {
case VFIO_DEVICE_STATE_STOP:
- ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_STOP);
- break;
+ return VFU_MIGR_STATE_STOP;
case VFIO_DEVICE_STATE_RUNNING:
- ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_RUNNING);
- break;
+ return VFU_MIGR_STATE_RUNNING;
case VFIO_DEVICE_STATE_SAVING:
/*
* FIXME How should the device operate during the stop-and-copy
@@ -147,30 +122,69 @@ handle_device_state(vfu_ctx_t *vfu_ctx, struct migration *migr,
* the migration region? E.g. Access to any other region should be
* failed? This might be a good question to send to LKML.
*/
- ret = migr->callbacks.transition(vfu_ctx,
- VFU_MIGR_STATE_STOP_AND_COPY);
- break;
+ return VFU_MIGR_STATE_STOP_AND_COPY;
case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING:
- ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_PRE_COPY);
- break;
+ return VFU_MIGR_STATE_PRE_COPY;
case VFIO_DEVICE_STATE_RESUMING:
- ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_RESUME);
- break;
- default:
- assert(false);
+ return VFU_MIGR_STATE_RESUME;
}
+ return -1;
+}
- if (ret == 0) {
- migr->info.device_state = *device_state;
- migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL);
- } else if (ret < 0) {
- vfu_log(vfu_ctx, LOG_ERR, "failed to transition to state %d: %m",
- *device_state);
+/**
+ * Returns 0 on success, -1 on error setting errno.
+ */
+int
+MOCK_DEFINE(state_trans_notify)(vfu_ctx_t *vfu_ctx,
+ int (*fn)(vfu_ctx_t *, vfu_migr_state_t),
+ uint32_t vfio_device_state)
+{
+ /*
+ * We've already checked that device_state is valid by calling
+ * vfio_migr_state_transition_is_valid.
+ */
+ return fn(vfu_ctx, migr_state_vfio_to_vfu(vfio_device_state));
+}
+
+/**
+ * Returns 0 on success, -1 on failure setting errno.
+ */
+ssize_t
+MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
+ uint32_t device_state, bool notify)
+{
+ if (notify) {
+ int ret = state_trans_notify(vfu_ctx, migr->callbacks.transition,
+ device_state);
+ if (ret != 0) {
+ return ret;
+ }
}
+ migr->info.device_state = device_state;
+ migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL);
+ return 0;
+}
- return ret;
+/**
+ * Returns 0 on success, -1 on failure setting errno.
+ */
+ssize_t
+MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr,
+ uint32_t device_state, bool notify)
+{
+
+ assert(migr != NULL);
+
+ if (!vfio_migr_state_transition_is_valid(migr->info.device_state,
+ device_state)) {
+ return ERROR_INT(EINVAL);
+ }
+ return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify);
}
+/**
+ * Returns 0 on success, -1 on error setting errno.
+ */
static ssize_t
handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *pending_bytes, bool is_write)
@@ -223,6 +237,9 @@ handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr,
* Make this behavior conditional.
*/
+/**
+ * Returns 0 on success, -1 on error setting errno.
+ */
static ssize_t
handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
bool is_write)
@@ -240,7 +257,7 @@ handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
case VFIO_USER_MIGR_ITER_STATE_STARTED:
ret = migr->callbacks.prepare_data(vfu_ctx, &migr->iter.offset,
&migr->iter.size);
- if (ret < 0) {
+ if (ret != 0) {
return ret;
}
/*
@@ -266,6 +283,9 @@ handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
return 0;
}
+/**
+ * Returns 0 on success, -1 on error setting errno.
+ */
static ssize_t
handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *offset, bool is_write)
@@ -290,7 +310,7 @@ handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr,
return ERROR_INT(EINVAL);
}
ret = migr->callbacks.prepare_data(vfu_ctx, offset, NULL);
- if (ret < 0) {
+ if (ret != 0) {
return ret;
}
*offset += migr->data_offset;
@@ -303,6 +323,9 @@ handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr,
return ERROR_INT(EINVAL);
}
+/**
+ * Returns 0 on success, -1 on failure setting errno.
+ */
static ssize_t
handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
bool is_write)
@@ -324,6 +347,9 @@ handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr,
return 0;
}
+/**
+ * Returns 0 on success, -1 on error setting errno.
+ */
static ssize_t
handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t size, bool is_write)
@@ -331,11 +357,14 @@ handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr,
assert(migr != NULL);
if (is_write) {
- return migr->callbacks.data_written(vfu_ctx, size);
+ return migr->callbacks.data_written(vfu_ctx, size);
}
return 0;
}
+/**
+ * Returns 0 on success, -1 on failure setting errno.
+ */
static ssize_t
handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr,
uint64_t *size, bool is_write)
@@ -361,12 +390,17 @@ handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr,
return ERROR_INT(EINVAL);
}
-static ssize_t
-migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
- loff_t pos, bool is_write)
+/**
+ * Returns 0 on success, -1 on failure setting errno.
+ */
+ssize_t
+MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf,
+ size_t count, loff_t pos,
+ bool is_write)
{
struct migration *migr = vfu_ctx->migration;
int ret;
+ uint32_t *device_state, old_device_state;
assert(migr != NULL);
@@ -377,7 +411,24 @@ migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
"bad device_state access size %ld", count);
return ERROR_INT(EINVAL);
}
- ret = handle_device_state(vfu_ctx, migr, (uint32_t *)buf, is_write);
+ device_state = (uint32_t *)buf;
+ if (!is_write) {
+ *device_state = migr->info.device_state;
+ return 0;
+ }
+ old_device_state = migr->info.device_state;
+ ret = handle_device_state(vfu_ctx, migr, *device_state , true);
+ if (ret == 0) {
+ vfu_log(vfu_ctx, LOG_DEBUG,
+ "migration: transition from state %s to state %s",
+ migr_states[old_device_state].name,
+ migr_states[*device_state].name);
+ } else {
+ vfu_log(vfu_ctx, LOG_ERR,
+ "migration: failed to transition from state %s to state %s",
+ migr_states[old_device_state].name,
+ migr_states[*device_state].name);
+ }
break;
case offsetof(struct vfio_device_migration_info, pending_bytes):
if (count != sizeof(migr->info.pending_bytes)) {
@@ -430,6 +481,9 @@ migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
if (pos + count <= sizeof(struct vfio_device_migration_info)) {
ret = migration_region_access_registers(vfu_ctx, buf, count,
pos, is_write);
+ if (ret != 0) {
+ return ret;
+ }
} else {
if (pos < (loff_t)migr->data_offset) {
diff --git a/lib/migration.h b/lib/migration.h
index aeaca66..ccb98aa 100644
--- a/lib/migration.h
+++ b/lib/migration.h
@@ -65,8 +65,11 @@ migration_get_pgsize(struct migration *migr);
int
migration_set_pgsize(struct migration *migr, size_t pgsize);
-bool
-vfio_migr_state_transition_is_valid(uint32_t from, uint32_t to);
+MOCK_DECLARE(bool, vfio_migr_state_transition_is_valid, uint32_t from,
+ uint32_t to);
+
+MOCK_DECLARE(ssize_t, handle_device_state, vfu_ctx_t *vfu_ctx,
+ struct migration *migr, uint32_t device_state, bool notify);
#endif /* LIB_VFIO_USER_MIGRATION_H */
diff --git a/lib/migration_priv.h b/lib/migration_priv.h
index bf47621..1a0496f 100644
--- a/lib/migration_priv.h
+++ b/lib/migration_priv.h
@@ -75,7 +75,9 @@ struct migr_state_data {
/* valid migration state transitions */
static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = {
[VFIO_DEVICE_STATE_STOP] = {
- .state = 1 << VFIO_DEVICE_STATE_STOP,
+ .state =
+ (1 << VFIO_DEVICE_STATE_STOP) |
+ (1 << VFIO_DEVICE_STATE_RUNNING),
.name = "stopped"
},
[VFIO_DEVICE_STATE_RUNNING] = {
@@ -91,6 +93,7 @@ static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] =
[VFIO_DEVICE_STATE_SAVING] = {
.state =
(1 << VFIO_DEVICE_STATE_STOP) |
+ (1 << VFIO_DEVICE_STATE_RUNNING) |
(1 << VFIO_DEVICE_STATE_SAVING) |
(1 << VFIO_DEVICE_STATE_ERROR),
.name = "stop-and-copy"
@@ -112,6 +115,21 @@ static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] =
}
};
+MOCK_DECLARE(ssize_t, migration_region_access_registers, vfu_ctx_t *vfu_ctx,
+ char *buf, size_t count, loff_t pos, bool is_write);
+
+MOCK_DECLARE(void, migr_state_transition, struct migration *migr,
+ enum migr_iter_state state);
+
+MOCK_DECLARE(vfu_migr_state_t, migr_state_vfio_to_vfu, uint32_t device_state);
+
+MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx,
+ int (*fn)(vfu_ctx_t *, vfu_migr_state_t),
+ uint32_t vfio_device_state);
+
+MOCK_DECLARE(ssize_t, migr_trans_to_valid_state, vfu_ctx_t *vfu_ctx,
+ struct migration *migr, uint32_t device_state, bool notify);
+
#endif
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lib/private.h b/lib/private.h
index 97b2469..2d84fb8 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -192,6 +192,13 @@ MOCK_DECLARE(int, exec_command, vfu_ctx_t *vfu_ctx, vfu_msg_t *msg);
MOCK_DECLARE(int, process_request, vfu_ctx_t *vfu_ctx);
+MOCK_DECLARE(int, handle_dirty_pages_get, vfu_ctx_t *vfu_ctx,
+ struct iovec **iovecs, size_t *nr_iovecs,
+ struct vfio_user_bitmap_range *ranges, uint32_t size);
+
+int
+handle_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason);
+
#endif /* LIB_VFIO_USER_PRIVATE_H */
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */