diff options
author | Thanos Makatos <thanos.makatos@nutanix.com> | 2021-05-20 17:10:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-20 17:10:51 +0100 |
commit | cfe9901919943f14961e1da1c4a823336ff79555 (patch) | |
tree | 2b5d8915bbbea8d239684e3334c9926fb910fc2c /lib | |
parent | 947941de95bf2c3f723b37151d67fb129fd01841 (diff) | |
download | libvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.zip libvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.tar.gz libvfio-user-cfe9901919943f14961e1da1c4a823336ff79555.tar.bz2 |
migration: various dirty page tracking fixes (#457)
- document how to use a vfio-user device with libvirt
- document how to use SPDK's nvmf/vfio-user target with libvirt
- replace vfio_bitmap with vfio_user_bitmap and vfio_iommu_type1_dirty_bitmap_get with vfio_user_bitmap_range
- fix bug for calculating number of pages needed for dirty page bitmap
- align number of bytes for dirty page bitmap to QWORD
- add debug messages around dirty page tracking
- only support flags=0 when doing DMA unmap
- set device state to running after reset
- allow region read/write even if device is in stopped state
- allow transitioning from stopped/stop-and-copy state to running state
- fix unit tests
Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Reviewed-by: John Levon <john.levon@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dma.c | 6 | ||||
-rw-r--r-- | lib/libvfio-user.c | 63 | ||||
-rw-r--r-- | lib/migration.c | 170 | ||||
-rw-r--r-- | lib/migration.h | 7 | ||||
-rw-r--r-- | lib/migration_priv.h | 20 | ||||
-rw-r--r-- | lib/private.h | 7 |
6 files changed, 190 insertions, 83 deletions
@@ -423,7 +423,7 @@ get_bitmap_size(size_t region_size, size_t pgsize) return ERROR_INT(EINVAL); } size_t nr_pages = (region_size / pgsize) + (region_size % pgsize != 0); - return (nr_pages / CHAR_BIT) + (nr_pages % CHAR_BIT != 0); + return ROUND_UP(nr_pages, sizeof(uint64_t) * CHAR_BIT) / CHAR_BIT; } int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize) @@ -511,11 +511,13 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, } if (pgsize != dma->dirty_pgsize) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %ld", pgsize); return ERROR_INT(EINVAL); } bitmap_size = get_bitmap_size(len, pgsize); if (bitmap_size < 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "failed to get bitmap size"); return bitmap_size; } @@ -524,6 +526,8 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, * expects to receive. */ if (size != (size_t)bitmap_size) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad bitmap size %ld != %ld", size, + bitmap_size); return ERROR_INT(EINVAL); } diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 7518a46..c4f6c42 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -543,6 +543,11 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ret = 0; } else { + + if (region->flags != 0) { + vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", region->flags); + return ERROR_INT(ENOTSUP); + } ret = dma_controller_remove_region(vfu_ctx->dma, (void *)region->addr, region->size, @@ -560,20 +565,34 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) } static int -handle_device_reset(vfu_ctx_t *vfu_ctx) +do_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason) { - vfu_log(vfu_ctx, LOG_DEBUG, "Device reset called by client"); + int ret; + if (vfu_ctx->reset != NULL) { - return vfu_ctx->reset(vfu_ctx, VFU_RESET_DEVICE); + ret = vfu_ctx->reset(vfu_ctx, reason); + if (ret < 0) { + return ret; + } + } + if (vfu_ctx->migration != NULL) { + return handle_device_state(vfu_ctx, vfu_ctx->migration, + VFIO_DEVICE_STATE_RUNNING, false); } return 0; } -static int -handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, - struct iovec **iovecs, size_t *nr_iovecs, - struct vfio_iommu_type1_dirty_bitmap_get *ranges, - uint32_t size) +int +handle_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason) +{ + return do_device_reset(vfu_ctx, reason); +} + +int +MOCK_DEFINE(handle_dirty_pages_get)(vfu_ctx_t *vfu_ctx, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_user_bitmap_range *ranges, + uint32_t size) { int ret = EINVAL; size_t i; @@ -583,23 +602,25 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, assert(nr_iovecs != NULL); assert(ranges != NULL); - if (size % sizeof(struct vfio_iommu_type1_dirty_bitmap_get) != 0) { + if (size % sizeof(struct vfio_user_bitmap_range) != 0) { return ERROR_INT(EINVAL); } - *nr_iovecs = size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get); + *nr_iovecs = size / sizeof(struct vfio_user_bitmap_range); *iovecs = malloc(*nr_iovecs * sizeof(struct iovec)); if (*iovecs == NULL) { return -1; } for (i = 0; i < *nr_iovecs; i++) { - struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[i]; + struct vfio_user_bitmap_range *r = &ranges[i]; ret = dma_controller_dirty_page_get(vfu_ctx->dma, (vfu_dma_addr_t)r->iova, r->size, r->bitmap.pgsize, r->bitmap.size, (char **)&((*iovecs)[i].iov_base)); if (ret != 0) { ret = errno; + vfu_log(vfu_ctx, LOG_WARNING, + "failed to get dirty bitmap from DMA controller: %m"); goto out; } (*iovecs)[i].iov_len = r->bitmap.size; @@ -644,7 +665,7 @@ MOCK_DEFINE(handle_dirty_pages)(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) break; case VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: { - struct vfio_iommu_type1_dirty_bitmap_get *get; + struct vfio_user_bitmap_range *get; get = (void *)(dirty_bitmap + 1); ret = handle_dirty_pages_get(vfu_ctx, &msg->out_iovecs, @@ -829,11 +850,12 @@ MOCK_DEFINE(should_exec_command)(vfu_ctx_t *vfu_ctx, uint16_t cmd) "bad command %d while device in stop-and-copy state", cmd); return false; } - } else if (device_is_stopped(vfu_ctx->migration) && - cmd != VFIO_USER_DIRTY_PAGES) { - vfu_log(vfu_ctx, LOG_ERR, - "bad command %d while device in stopped state", cmd); - return false; + } else if (device_is_stopped(vfu_ctx->migration)) { + if (!cmd_allowed_when_stopped_and_copying(cmd)) { + vfu_log(vfu_ctx, LOG_ERR, + "bad command %d while device in stopped state", cmd); + return false; + } } return true; } @@ -873,7 +895,8 @@ MOCK_DEFINE(exec_command)(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) break; case VFIO_USER_DEVICE_RESET: - ret = handle_device_reset(vfu_ctx); + vfu_log(vfu_ctx, LOG_INFO, "device reset by client"); + ret = handle_device_reset(vfu_ctx, VFU_RESET_DEVICE); break; case VFIO_USER_DIRTY_PAGES: @@ -1091,9 +1114,7 @@ vfu_reset_ctx(vfu_ctx_t *vfu_ctx, const char *reason) vfu_ctx); } - if (vfu_ctx->reset != NULL) { - vfu_ctx->reset(vfu_ctx, VFU_RESET_LOST_CONN); - } + do_device_reset(vfu_ctx, VFU_RESET_LOST_CONN); if (vfu_ctx->irqs != NULL) { irqs_reset(vfu_ctx); diff --git a/lib/migration.c b/lib/migration.c index 7294403..8f0706a 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -40,7 +40,7 @@ #include "migration_priv.h" bool -vfio_migr_state_transition_is_valid(uint32_t from, uint32_t to) +MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to) { return migr_states[from].state & (1 << to); } @@ -98,48 +98,23 @@ init_migration(const vfu_migration_callbacks_t * callbacks, return migr; } -static void -migr_state_transition(struct migration *migr, enum migr_iter_state state) +void +MOCK_DEFINE(migr_state_transition)(struct migration *migr, + enum migr_iter_state state) { assert(migr != NULL); /* FIXME validate that state transition */ migr->iter.state = state; } -static ssize_t -handle_device_state(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint32_t *device_state, bool is_write) { - - int ret; - - assert(migr != NULL); - assert(device_state != NULL); - - if (!is_write) { - *device_state = migr->info.device_state; - return 0; - } - - if (*device_state & ~VFIO_DEVICE_STATE_MASK) { - vfu_log(vfu_ctx, LOG_ERR, "bad device state %#x", *device_state); - return ERROR_INT(EINVAL); - } - - if (!vfio_migr_state_transition_is_valid(migr->info.device_state, - *device_state)) { - vfu_log(vfu_ctx, LOG_ERR, "bad transition from state %s to state %s", - migr_states[migr->info.device_state].name, - migr_states[*device_state].name); - return ERROR_INT(EINVAL); - } - - switch (*device_state) { +vfu_migr_state_t +MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t device_state) +{ + switch (device_state) { case VFIO_DEVICE_STATE_STOP: - ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_STOP); - break; + return VFU_MIGR_STATE_STOP; case VFIO_DEVICE_STATE_RUNNING: - ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_RUNNING); - break; + return VFU_MIGR_STATE_RUNNING; case VFIO_DEVICE_STATE_SAVING: /* * FIXME How should the device operate during the stop-and-copy @@ -147,30 +122,69 @@ handle_device_state(vfu_ctx_t *vfu_ctx, struct migration *migr, * the migration region? E.g. Access to any other region should be * failed? This might be a good question to send to LKML. */ - ret = migr->callbacks.transition(vfu_ctx, - VFU_MIGR_STATE_STOP_AND_COPY); - break; + return VFU_MIGR_STATE_STOP_AND_COPY; case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: - ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_PRE_COPY); - break; + return VFU_MIGR_STATE_PRE_COPY; case VFIO_DEVICE_STATE_RESUMING: - ret = migr->callbacks.transition(vfu_ctx, VFU_MIGR_STATE_RESUME); - break; - default: - assert(false); + return VFU_MIGR_STATE_RESUME; } + return -1; +} - if (ret == 0) { - migr->info.device_state = *device_state; - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL); - } else if (ret < 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to transition to state %d: %m", - *device_state); +/** + * Returns 0 on success, -1 on error setting errno. + */ +int +MOCK_DEFINE(state_trans_notify)(vfu_ctx_t *vfu_ctx, + int (*fn)(vfu_ctx_t *, vfu_migr_state_t), + uint32_t vfio_device_state) +{ + /* + * We've already checked that device_state is valid by calling + * vfio_migr_state_transition_is_valid. + */ + return fn(vfu_ctx, migr_state_vfio_to_vfu(vfio_device_state)); +} + +/** + * Returns 0 on success, -1 on failure setting errno. + */ +ssize_t +MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, + uint32_t device_state, bool notify) +{ + if (notify) { + int ret = state_trans_notify(vfu_ctx, migr->callbacks.transition, + device_state); + if (ret != 0) { + return ret; + } } + migr->info.device_state = device_state; + migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL); + return 0; +} - return ret; +/** + * Returns 0 on success, -1 on failure setting errno. + */ +ssize_t +MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, + uint32_t device_state, bool notify) +{ + + assert(migr != NULL); + + if (!vfio_migr_state_transition_is_valid(migr->info.device_state, + device_state)) { + return ERROR_INT(EINVAL); + } + return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify); } +/** + * Returns 0 on success, -1 on error setting errno. + */ static ssize_t handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr, uint64_t *pending_bytes, bool is_write) @@ -223,6 +237,9 @@ handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr, * Make this behavior conditional. */ +/** + * Returns 0 on success, -1 on error setting errno. + */ static ssize_t handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, bool is_write) @@ -240,7 +257,7 @@ handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, case VFIO_USER_MIGR_ITER_STATE_STARTED: ret = migr->callbacks.prepare_data(vfu_ctx, &migr->iter.offset, &migr->iter.size); - if (ret < 0) { + if (ret != 0) { return ret; } /* @@ -266,6 +283,9 @@ handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, return 0; } +/** + * Returns 0 on success, -1 on error setting errno. + */ static ssize_t handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr, uint64_t *offset, bool is_write) @@ -290,7 +310,7 @@ handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr, return ERROR_INT(EINVAL); } ret = migr->callbacks.prepare_data(vfu_ctx, offset, NULL); - if (ret < 0) { + if (ret != 0) { return ret; } *offset += migr->data_offset; @@ -303,6 +323,9 @@ handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr, return ERROR_INT(EINVAL); } +/** + * Returns 0 on success, -1 on failure setting errno. + */ static ssize_t handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, bool is_write) @@ -324,6 +347,9 @@ handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, return 0; } +/** + * Returns 0 on success, -1 on error setting errno. + */ static ssize_t handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr, uint64_t size, bool is_write) @@ -331,11 +357,14 @@ handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr, assert(migr != NULL); if (is_write) { - return migr->callbacks.data_written(vfu_ctx, size); + return migr->callbacks.data_written(vfu_ctx, size); } return 0; } +/** + * Returns 0 on success, -1 on failure setting errno. + */ static ssize_t handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr, uint64_t *size, bool is_write) @@ -361,12 +390,17 @@ handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr, return ERROR_INT(EINVAL); } -static ssize_t -migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write) +/** + * Returns 0 on success, -1 on failure setting errno. + */ +ssize_t +MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf, + size_t count, loff_t pos, + bool is_write) { struct migration *migr = vfu_ctx->migration; int ret; + uint32_t *device_state, old_device_state; assert(migr != NULL); @@ -377,7 +411,24 @@ migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count, "bad device_state access size %ld", count); return ERROR_INT(EINVAL); } - ret = handle_device_state(vfu_ctx, migr, (uint32_t *)buf, is_write); + device_state = (uint32_t *)buf; + if (!is_write) { + *device_state = migr->info.device_state; + return 0; + } + old_device_state = migr->info.device_state; + ret = handle_device_state(vfu_ctx, migr, *device_state , true); + if (ret == 0) { + vfu_log(vfu_ctx, LOG_DEBUG, + "migration: transition from state %s to state %s", + migr_states[old_device_state].name, + migr_states[*device_state].name); + } else { + vfu_log(vfu_ctx, LOG_ERR, + "migration: failed to transition from state %s to state %s", + migr_states[old_device_state].name, + migr_states[*device_state].name); + } break; case offsetof(struct vfio_device_migration_info, pending_bytes): if (count != sizeof(migr->info.pending_bytes)) { @@ -430,6 +481,9 @@ migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, if (pos + count <= sizeof(struct vfio_device_migration_info)) { ret = migration_region_access_registers(vfu_ctx, buf, count, pos, is_write); + if (ret != 0) { + return ret; + } } else { if (pos < (loff_t)migr->data_offset) { diff --git a/lib/migration.h b/lib/migration.h index aeaca66..ccb98aa 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -65,8 +65,11 @@ migration_get_pgsize(struct migration *migr); int migration_set_pgsize(struct migration *migr, size_t pgsize); -bool -vfio_migr_state_transition_is_valid(uint32_t from, uint32_t to); +MOCK_DECLARE(bool, vfio_migr_state_transition_is_valid, uint32_t from, + uint32_t to); + +MOCK_DECLARE(ssize_t, handle_device_state, vfu_ctx_t *vfu_ctx, + struct migration *migr, uint32_t device_state, bool notify); #endif /* LIB_VFIO_USER_MIGRATION_H */ diff --git a/lib/migration_priv.h b/lib/migration_priv.h index bf47621..1a0496f 100644 --- a/lib/migration_priv.h +++ b/lib/migration_priv.h @@ -75,7 +75,9 @@ struct migr_state_data { /* valid migration state transitions */ static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = { [VFIO_DEVICE_STATE_STOP] = { - .state = 1 << VFIO_DEVICE_STATE_STOP, + .state = + (1 << VFIO_DEVICE_STATE_STOP) | + (1 << VFIO_DEVICE_STATE_RUNNING), .name = "stopped" }, [VFIO_DEVICE_STATE_RUNNING] = { @@ -91,6 +93,7 @@ static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = [VFIO_DEVICE_STATE_SAVING] = { .state = (1 << VFIO_DEVICE_STATE_STOP) | + (1 << VFIO_DEVICE_STATE_RUNNING) | (1 << VFIO_DEVICE_STATE_SAVING) | (1 << VFIO_DEVICE_STATE_ERROR), .name = "stop-and-copy" @@ -112,6 +115,21 @@ static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = } }; +MOCK_DECLARE(ssize_t, migration_region_access_registers, vfu_ctx_t *vfu_ctx, + char *buf, size_t count, loff_t pos, bool is_write); + +MOCK_DECLARE(void, migr_state_transition, struct migration *migr, + enum migr_iter_state state); + +MOCK_DECLARE(vfu_migr_state_t, migr_state_vfio_to_vfu, uint32_t device_state); + +MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx, + int (*fn)(vfu_ctx_t *, vfu_migr_state_t), + uint32_t vfio_device_state); + +MOCK_DECLARE(ssize_t, migr_trans_to_valid_state, vfu_ctx_t *vfu_ctx, + struct migration *migr, uint32_t device_state, bool notify); + #endif /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/private.h b/lib/private.h index 97b2469..2d84fb8 100644 --- a/lib/private.h +++ b/lib/private.h @@ -192,6 +192,13 @@ MOCK_DECLARE(int, exec_command, vfu_ctx_t *vfu_ctx, vfu_msg_t *msg); MOCK_DECLARE(int, process_request, vfu_ctx_t *vfu_ctx); +MOCK_DECLARE(int, handle_dirty_pages_get, vfu_ctx_t *vfu_ctx, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_user_bitmap_range *ranges, uint32_t size); + +int +handle_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason); + #endif /* LIB_VFIO_USER_PRIVATE_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |