From 2ce5c70f9e0dc1bd159cec9cfa1f7cff7fef3596 Mon Sep 17 00:00:00 2001 From: William Henderson Date: Mon, 10 Jul 2023 10:48:58 +0000 Subject: update samples to use migration v2 Signed-off-by: William Henderson --- samples/client.c | 309 ++++++++++++++++++++++++--------------------- samples/gpio-pci-idio-16.c | 58 ++------- samples/server.c | 236 +++++++++++++++------------------- 3 files changed, 280 insertions(+), 323 deletions(-) diff --git a/samples/client.c b/samples/client.c index d4abd21..121122a 100644 --- a/samples/client.c +++ b/samples/client.c @@ -62,6 +62,8 @@ static char const *irq_to_str[] = { [VFU_DEV_REQ_IRQ] = "REQ" }; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + void vfu_log(UNUSED vfu_ctx_t *vfu_ctx, UNUSED int level, const char *fmt, ...) @@ -217,9 +219,7 @@ static bool get_region_vfio_caps(struct vfio_info_cap_header *header, struct vfio_region_info_cap_sparse_mmap **sparse) { - struct vfio_region_info_cap_type *type; unsigned int i; - bool migr = false; while (true) { switch (header->id) { @@ -234,16 +234,6 @@ get_region_vfio_caps(struct vfio_info_cap_header *header, (ull_t)(*sparse)->areas[i].size); } break; - case VFIO_REGION_INFO_CAP_TYPE: - type = (struct vfio_region_info_cap_type*)header; - if (type->type != VFIO_REGION_TYPE_MIGRATION || - type->subtype != VFIO_REGION_SUBTYPE_MIGRATION) { - errx(EXIT_FAILURE, "bad region type %d/%d", type->type, - type->subtype); - } - migr = true; - printf("client: migration region\n"); - break; default: errx(EXIT_FAILURE, "bad VFIO cap ID %#x", header->id); } @@ -252,7 +242,7 @@ get_region_vfio_caps(struct vfio_info_cap_header *header, } header = (struct vfio_info_cap_header*)((char*)header + header->next - sizeof(struct vfio_region_info)); } - return migr; + return false; } static void @@ -347,8 +337,7 @@ get_device_region_info(int sock, uint32_t index) if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1), &sparse)) { if (sparse != NULL) { - assert((index == VFU_PCI_DEV_BAR1_REGION_IDX && nr_fds == 2) || - (index == VFU_PCI_DEV_MIGR_REGION_IDX && nr_fds == 1)); + assert((index == VFU_PCI_DEV_BAR1_REGION_IDX && nr_fds == 2)); assert(nr_fds == sparse->nr_areas); mmap_sparse_areas(fds, region_info, sparse); } @@ -386,7 +375,7 @@ get_device_info(int sock, struct vfio_user_device_info *dev_info) err(EXIT_FAILURE, "failed to get device info"); } - if (dev_info->num_regions != 10) { + if (dev_info->num_regions != 9) { errx(EXIT_FAILURE, "bad number of device regions %d", dev_info->num_regions); } @@ -471,7 +460,6 @@ access_region(int sock, int region, bool is_write, uint64_t offset, .iov_len = data_len } }; - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; struct vfio_user_region_access *recv_data; size_t nr_send_iovecs, recv_data_len; int op, ret; @@ -526,6 +514,114 @@ access_region(int sock, int region, bool is_write, uint64_t offset, return 0; } +static int +set_migration_state(int sock, uint32_t state) +{ + static int msg_id = 0xfab1; + struct vfio_user_device_feature req = { + .argsz = 16, + .flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE + }; + struct vfio_user_device_feature_mig_state change_state = { + .device_state = state, + .data_fd = 0 + }; + struct iovec send_iovecs[3] = { + [1] = { + .iov_base = &req, + .iov_len = sizeof(req) + }, + [2] = { + .iov_base = &change_state, + .iov_len = sizeof(change_state) + } + }; + void* response = malloc(sizeof(req) + sizeof(change_state)); + + pthread_mutex_lock(&mutex); + int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_DEVICE_FEATURE, + send_iovecs, 3, NULL, 0, NULL, + response, sizeof(req) + sizeof(change_state), + NULL, 0); + pthread_mutex_unlock(&mutex); + + if (ret < 0) { + return -1; + } + + assert(memcmp(&req, response, sizeof(req)) == 0); + assert(memcmp(&change_state, response + sizeof(req), + sizeof(change_state)) == 0); + + return ret; +} + +static int +read_migr_data(int sock, void *buf, size_t len) +{ + static int msg_id = 0x6904; + struct vfio_user_mig_data_without_data req = { + .argsz = 12, + .size = len + }; + struct iovec send_iovecs[2] = { + [1] = { + .iov_base = &req, + .iov_len = sizeof(req) + } + }; + struct vfio_user_mig_data_with_data *res = calloc(1, sizeof(req) + len); + + pthread_mutex_lock(&mutex); + int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_MIG_DATA_READ, + send_iovecs, 2, NULL, 0, NULL, + res, sizeof(req) + len, NULL, 0); + pthread_mutex_unlock(&mutex); + + if (ret < 0) { + free(res); + return -1; + } + + memcpy(buf, res->data, res->size); + + free(res); + + return res->size; +} + +static int +write_migr_data(int sock, void *buf, size_t len) +{ + static int msg_id = 0x2023; + struct vfio_user_mig_data_with_data req = { + .argsz = 12 + len, + .size = len + }; + struct iovec send_iovecs[3] = { + [1] = { + .iov_base = &req, + .iov_len = sizeof(req) + }, + [2] = { + .iov_base = buf, + .iov_len = len + } + }; + + pthread_mutex_lock(&mutex); + int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_MIG_DATA_WRITE, + send_iovecs, 3, NULL, 0, NULL, + &req, sizeof(req), NULL, 0); + pthread_mutex_unlock(&mutex); + + if (ret < 0) { + return -1; + } + + return ret; +} + static void access_bar0(int sock, time_t *t) { @@ -735,79 +831,37 @@ usage(char *argv0) basename(argv0)); } -/* - * Normally each time the source client (QEMU) would read migration data from - * the device it would send them to the destination client. However, since in - * our sample both the source and the destination client are the same process, - * we simply accumulate the migration data of each iteration and apply it to - * the destination server at the end. - * - * Performs as many migration loops as @nr_iters or until the device has no - * more migration data (pending_bytes is zero), which ever comes first. The - * result of each migration iteration is stored in @migr_iter. @migr_iter must - * be at least @nr_iters. - * - * @returns the number of iterations performed - */ static size_t -do_migrate(int sock, size_t nr_iters, struct iovec *migr_iter) +do_migrate(int sock, size_t max_iters, size_t max_iter_size, + struct iovec *migr_iter) { int ret; - uint64_t pending_bytes, data_offset, data_size; - size_t i = 0; - - assert(nr_iters > 0); - - /* XXX read pending_bytes */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - offsetof(struct vfio_user_migration_info, pending_bytes), - &pending_bytes, sizeof(pending_bytes)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to read pending_bytes"); - } - - for (i = 0; i < nr_iters && pending_bytes > 0; i++) { + size_t i; + bool is_more = true; - /* XXX read data_offset and data_size */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - offsetof(struct vfio_user_migration_info, data_offset), - &data_offset, sizeof(data_offset)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to read data_offset"); - } + for (i = 0; i < max_iters && is_more; i++) { - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - offsetof(struct vfio_user_migration_info, data_size), - &data_size, sizeof(data_size)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to read data_size"); - } + migr_iter[i].iov_len = max_iter_size; + migr_iter[i].iov_base = malloc(migr_iter[i].iov_len); - migr_iter[i].iov_len = data_size; - migr_iter[i].iov_base = malloc(data_size); if (migr_iter[i].iov_base == NULL) { err(EXIT_FAILURE, "failed to allocate migration buffer"); } /* XXX read migration data */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - data_offset, - (char *)migr_iter[i].iov_base, data_size); + ret = read_migr_data(sock, migr_iter[i].iov_base, migr_iter[i].iov_len); if (ret < 0) { err(EXIT_FAILURE, "failed to read migration data"); } - /* FIXME send migration data to the destination client process */ - - /* - * XXX read pending_bytes again to indicate to the server that the - * migration data have been consumed. - */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - offsetof(struct vfio_user_migration_info, pending_bytes), - &pending_bytes, sizeof(pending_bytes)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to read pending_bytes"); + if (ret < (int)migr_iter[i].iov_len) { + // FIXME is it pointless shuffling stuff around? + void* buf = malloc(ret); + memcpy(buf, migr_iter[i].iov_base, ret); + free(migr_iter[i].iov_base); + migr_iter[i].iov_base = buf; + migr_iter[i].iov_len = ret; + is_more = false; } } return i; @@ -844,7 +898,7 @@ fake_guest(void *arg) if (ret != 0) { err(EXIT_FAILURE, "fake guest failed to write garbage to BAR1"); } - crc = rte_hash_crc(buf, fake_guest_data->bar1_size, crc); + crc = rte_hash_crc(buf, fake_guest_data->bar1_size, 0); __sync_synchronize(); } while (!fake_guest_data->done); @@ -859,7 +913,6 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, { uint32_t device_state; int ret; - size_t _nr_iters; pthread_t thread; struct fake_guest_data fake_guest_data = { .sock = sock, @@ -868,13 +921,15 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, .crcp = crcp }; + size_t max_iter_size = 4096; + ret = pthread_create(&thread, NULL, fake_guest, &fake_guest_data); if (ret != 0) { errno = ret; err(EXIT_FAILURE, "failed to create pthread"); } - *nr_iters = 2; + *nr_iters = 8; *migr_iters = malloc(sizeof(struct iovec) * *nr_iters); if (*migr_iters == NULL) { err(EXIT_FAILURE, NULL); @@ -884,16 +939,17 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, * XXX set device state to pre-copy. This is technically optional but any * VMM that cares about performance needs this. */ - device_state = VFIO_DEVICE_STATE_V1_SAVING | VFIO_DEVICE_STATE_V1_RUNNING; - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, device_state), - &device_state, sizeof(device_state)); + device_state = VFIO_DEVICE_STATE_PRE_COPY; + ret = set_migration_state(sock, device_state); if (ret < 0) { err(EXIT_FAILURE, "failed to write to device state"); } - _nr_iters = do_migrate(sock, 1, *migr_iters); - assert(_nr_iters == 1); + ret = do_migrate(sock, *nr_iters, max_iter_size, *migr_iters); + if (ret < 0) { + err(EXIT_FAILURE, "failed to do migration in pre-copy state"); + } + printf("client: stopping fake guest thread\n"); fake_guest_data.done = true; __sync_synchronize(); @@ -905,31 +961,25 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters, printf("client: setting device state to stop-and-copy\n"); - device_state = VFIO_DEVICE_STATE_V1_SAVING; - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, device_state), - &device_state, sizeof(device_state)); + device_state = VFIO_DEVICE_STATE_STOP_COPY; + ret = set_migration_state(sock, device_state); if (ret < 0) { err(EXIT_FAILURE, "failed to write to device state"); } - _nr_iters += do_migrate(sock, 1, (*migr_iters) + _nr_iters); - if (_nr_iters != 2) { - errx(EXIT_FAILURE, - "expected 2 iterations instead of %zu while in stop-and-copy state", - _nr_iters); + size_t iters = do_migrate(sock, *nr_iters, max_iter_size, *migr_iters); + if (ret < 0) { + err(EXIT_FAILURE, "failed to do migration in stop-and-copy state"); } /* XXX read device state, migration must have finished now */ - device_state = VFIO_DEVICE_STATE_V1_STOP; - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, device_state), - &device_state, sizeof(device_state)); + device_state = VFIO_DEVICE_STATE_STOP; + ret = set_migration_state(sock, device_state); if (ret < 0) { err(EXIT_FAILURE, "failed to write to device state"); } - return _nr_iters; + return iters; } static int @@ -941,8 +991,7 @@ migrate_to(char *old_sock_path, int *server_max_fds, int ret, sock; char *sock_path; struct stat sb; - uint32_t device_state = VFIO_DEVICE_STATE_V1_RESUMING; - uint64_t data_offset, data_len; + uint32_t device_state = VFIO_DEVICE_STATE_RESUMING; size_t i; uint32_t dst_crc; char buf[bar1_size]; @@ -960,9 +1009,10 @@ migrate_to(char *old_sock_path, int *server_max_fds, if (ret == -1) { err(EXIT_FAILURE, "failed to fork"); } - if (ret > 0) { /* child (destination server) */ + if (ret == 0) { /* child (destination server) */ char *_argv[] = { path_to_server, + (char *)"-r", // start in VFIO_DEVICE_STATE_RESUMING (char *)"-v", sock_path, NULL @@ -992,57 +1042,23 @@ migrate_to(char *old_sock_path, int *server_max_fds, negotiate(sock, server_max_fds, server_max_data_xfer_size, pgsize); - /* XXX set device state to resuming */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, device_state), - &device_state, sizeof(device_state)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to set device state to resuming"); - } - for (i = 0; i < nr_iters; i++) { - /* XXX read data offset */ - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false, - offsetof(struct vfio_user_migration_info, data_offset), - &data_offset, sizeof(data_offset)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to read migration data offset"); - } - /* XXX write migration data */ - /* - * TODO write half of migration data via regular write and other half via - * memopy map. - */ - printf("client: writing migration device data %#llx-%#llx\n", - (ull_t)data_offset, - (ull_t)(data_offset + migr_iters[i].iov_len - 1)); - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - data_offset, migr_iters[i].iov_base, - migr_iters[i].iov_len); + printf("client: writing migration device data iter %zu\n", i); + ret = write_migr_data(sock, migr_iters[i].iov_base, + migr_iters[i].iov_len); if (ret < 0) { err(EXIT_FAILURE, "failed to write device migration data"); } - - /* XXX write data_size */ - data_len = migr_iters[i].iov_len; - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, data_size), - &data_len, sizeof(data_len)); - if (ret < 0) { - err(EXIT_FAILURE, "failed to write migration data size"); - } } - /* XXX set device state to running */ - device_state = VFIO_DEVICE_STATE_V1_RUNNING; - ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true, - offsetof(struct vfio_user_migration_info, device_state), - &device_state, sizeof(device_state)); + /* XXX set device state to stop to finish the transfer */ + device_state = VFIO_DEVICE_STATE_STOP; + ret = set_migration_state(sock, device_state); if (ret < 0) { - err(EXIT_FAILURE, "failed to set device state to running"); + err(EXIT_FAILURE, "failed to set device state to stop"); } /* validate contents of BAR1 */ @@ -1056,6 +1072,15 @@ migrate_to(char *old_sock_path, int *server_max_fds, if (dst_crc != src_crc) { fprintf(stderr, "client: CRC mismatch: %u != %u\n", src_crc, dst_crc); abort(); + } else { + fprintf(stdout, "client: CRC match, we did it! :)\n"); + } + + /* XXX set device state to running */ + device_state = VFIO_DEVICE_STATE_RUNNING; + ret = set_migration_state(sock, device_state); + if (ret < 0) { + err(EXIT_FAILURE, "failed to set device state to running"); } return sock; diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c index b50f407..b323249 100644 --- a/samples/gpio-pci-idio-16.c +++ b/samples/gpio-pci-idio-16.c @@ -77,49 +77,23 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state) return 0; } -static uint64_t -migration_get_pending_bytes(UNUSED vfu_ctx_t *vfu_ctx) -{ - if (dirty) { - return sizeof(pin); - } - return 0; -} - -static int -migration_prepare_data(UNUSED vfu_ctx_t *vfu_ctx, - uint64_t *offset, uint64_t *size) -{ - *offset = 0; - if (size != NULL) { /* null means resuming */ - *size = sizeof(pin); - } - return 0; -} - static ssize_t -migration_read_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, - uint64_t size, uint64_t offset) +migration_read_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, uint64_t size) { - assert(offset == 0); assert(size == sizeof(pin)); - memcpy(buf, &pin, sizeof(pin)); - dirty = false; - return 0; -} -static int -migration_data_written(UNUSED vfu_ctx_t *vfu_ctx, uint64_t count) -{ - assert(count == sizeof(pin)); - return 0; + if (dirty) { + memcpy(buf, &pin, sizeof(pin)); + dirty = false; + return sizeof(pin); + } else { + return 0; + } } static ssize_t -migration_write_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, - uint64_t size, uint64_t offset) +migration_write_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, uint64_t size) { - assert(offset == 0); assert(size == sizeof(pin)); memcpy(&pin, buf, sizeof(pin)); return 0; @@ -145,16 +119,10 @@ main(int argc, char *argv[]) int opt; struct sigaction act = { .sa_handler = _sa_handler }; vfu_ctx_t *vfu_ctx; - size_t migr_regs_size = vfu_get_migr_register_area_size(); - size_t migr_data_size = sysconf(_SC_PAGE_SIZE); - size_t migr_size = migr_regs_size + migr_data_size; const vfu_migration_callbacks_t migr_callbacks = { .version = VFU_MIGR_CALLBACKS_VERS, .transition = &migration_device_state_transition, - .get_pending_bytes = &migration_get_pending_bytes, - .prepare_data = &migration_prepare_data, .read_data = &migration_read_data, - .data_written = &migration_data_written, .write_data = &migration_write_data }; @@ -214,13 +182,7 @@ main(int argc, char *argv[]) } if (enable_migr) { - ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX, migr_size, - NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0); - if (ret < 0) { - err(EXIT_FAILURE, "failed to setup migration region"); - } - ret = vfu_setup_device_migration_callbacks(vfu_ctx, &migr_callbacks, - migr_regs_size); + ret = vfu_setup_device_migration_callbacks(vfu_ctx, 0, &migr_callbacks); if (ret < 0) { err(EXIT_FAILURE, "failed to setup device migration"); } diff --git a/samples/server.c b/samples/server.c index 37c5d78..2b93771 100644 --- a/samples/server.c +++ b/samples/server.c @@ -62,7 +62,8 @@ struct server_data { size_t bar1_size; struct dma_regions regions[NR_DMA_REGIONS]; struct { - uint64_t pending_bytes; + uint64_t pending_read; + uint64_t pending_write; vfu_migr_state_t state; } migration; }; @@ -134,7 +135,7 @@ bar1_access(vfu_ctx_t *vfu_ctx, char * const buf, if (is_write) { if (server_data->migration.state == VFU_MIGR_STATE_PRE_COPY) { /* dirty the whole thing */ - server_data->migration.pending_bytes = server_data->bar1_size; + server_data->migration.pending_read = server_data->bar1_size; } memcpy(server_data->bar1 + offset, buf, count); } else { @@ -274,19 +275,20 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state) if (setitimer(ITIMER_REAL, &new, NULL) != 0) { err(EXIT_FAILURE, "failed to disable timer"); } - server_data->migration.pending_bytes = server_data->bar1_size + sizeof(time_t); /* FIXME BAR0 region size */ + server_data->migration.pending_read = server_data->bar1_size + sizeof(time_t); /* FIXME BAR0 region size */ break; case VFU_MIGR_STATE_PRE_COPY: - /* TODO must be less than size of data region in migration region */ - server_data->migration.pending_bytes = server_data->bar1_size; + server_data->migration.pending_read = server_data->bar1_size; break; case VFU_MIGR_STATE_STOP: /* FIXME should gracefully fail */ - assert(server_data->migration.pending_bytes == 0); + assert(server_data->migration.pending_read == 0); break; case VFU_MIGR_STATE_RESUME: + server_data->migration.pending_write = server_data->bar1_size + sizeof(time_t); break; case VFU_MIGR_STATE_RUNNING: + assert(server_data->migration.pending_write == 0); ret = arm_timer(vfu_ctx, server_data->bar0); if (ret < 0) { return ret; @@ -299,125 +301,119 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state) return 0; } -static uint64_t -migration_get_pending_bytes(vfu_ctx_t *vfu_ctx) -{ - struct server_data *server_data = vfu_get_private(vfu_ctx); - return server_data->migration.pending_bytes; -} - -static int -migration_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset, uint64_t *size) -{ - struct server_data *server_data = vfu_get_private(vfu_ctx); - - *offset = 0; - if (size != NULL) { - *size = server_data->migration.pending_bytes; - } - return 0; -} - static ssize_t -migration_read_data(vfu_ctx_t *vfu_ctx, void *buf, - uint64_t size, uint64_t offset) +migration_read_data(vfu_ctx_t *vfu_ctx, void *buf, uint64_t size) { struct server_data *server_data = vfu_get_private(vfu_ctx); - if (server_data->migration.state != VFU_MIGR_STATE_PRE_COPY && - server_data->migration.state != VFU_MIGR_STATE_STOP_AND_COPY) - { - return size; - } - /* - * For ease of implementation we expect the client to read all migration - * data in one go; partial reads are not supported. This is allowed by VFIO - * however we don't yet support it. Similarly, when resuming, partial - * writes are supported by VFIO, however we don't in this sample. - * * If in pre-copy state we copy BAR1, if in stop-and-copy state we copy * both BAR1 and BAR0. Since we always copy BAR1 in the stop-and-copy state, * copying BAR1 in the pre-copy state is pointless. Fixing this requires * more complex state tracking which exceeds the scope of this sample. */ - if (offset != 0 || size != server_data->migration.pending_bytes) { - errno = EINVAL; - return -1; + if (server_data->migration.pending_read == 0 || size == 0) { + return 0; } - memcpy(buf, server_data->bar1, server_data->bar1_size); + uint32_t total_read = server_data->bar1_size; + if (server_data->migration.state == VFU_MIGR_STATE_STOP_AND_COPY) { - memcpy(buf + server_data->bar1_size, &server_data->bar0, - sizeof(server_data->bar0)); + total_read += sizeof(server_data->bar0); + } + + uint32_t read_start = total_read - server_data->migration.pending_read; + uint32_t read_end = MIN(read_start + size, total_read); // exclusive + assert(read_end > read_start); + + uint32_t bytes_read = read_end - read_start; + + if (read_end <= server_data->bar1_size) { + // case 1: entire read lies within bar1 + // TODO check the following is always allowed + + memcpy(buf, server_data->bar1 + read_start, bytes_read); + } else if ( + read_start < server_data->bar1_size // starts in bar1 + && read_end > server_data->bar1_size // ends in bar0 + ) { + // case 2: part of the read in bar1 and part of the read in bar0 + // TODO check the following is always allowed + + uint32_t length_in_bar1 = server_data->bar1_size - read_start; + uint32_t length_in_bar0 = read_end - server_data->bar1_size; + assert(length_in_bar1 + length_in_bar0 == bytes_read); + + memcpy(buf, server_data->bar1 + read_start, length_in_bar1); + memcpy(buf + length_in_bar1, &server_data->bar0, length_in_bar0); + } else if (read_start >= server_data->bar1_size) { + // case 3: entire read lies within bar0 + // TODO check the following is always allowed + + read_start -= server_data->bar1_size; + read_end -= server_data->bar1_size; + + memcpy(buf, &server_data->bar0 + read_start, bytes_read); } - server_data->migration.pending_bytes = 0; - return size; + server_data->migration.pending_read -= bytes_read; + + return bytes_read; } static ssize_t -migration_write_data(vfu_ctx_t *vfu_ctx, void *data, - uint64_t size, uint64_t offset) +migration_write_data(vfu_ctx_t *vfu_ctx, void *data, uint64_t size) { struct server_data *server_data = vfu_get_private(vfu_ctx); char *buf = data; - int ret; assert(server_data != NULL); assert(data != NULL); - if (offset != 0 || size < server_data->bar1_size) { - vfu_log(vfu_ctx, LOG_DEBUG, "XXX bad migration data write %#llx-%#llx", - (unsigned long long)offset, - (unsigned long long)offset + size - 1); - errno = EINVAL; - return -1; - } - - memcpy(server_data->bar1, buf, server_data->bar1_size); - buf += server_data->bar1_size; - size -= server_data->bar1_size; - if (size == 0) { + if (server_data->migration.pending_write == 0 || size == 0) { return 0; } - if (size != sizeof(server_data->bar0)) { - errno = EINVAL; - return -1; - } - memcpy(&server_data->bar0, buf, sizeof(server_data->bar0)); - ret = bar0_access(vfu_ctx, buf, sizeof(server_data->bar0), 0, true); - assert(ret == (int)size); /* FIXME */ - return 0; -} + uint32_t total_write = server_data->bar1_size + sizeof(server_data->bar0); + uint32_t write_start = total_write - server_data->migration.pending_write; + uint32_t write_end = MIN(write_start + size, total_write); // exclusive + assert(write_end > write_start); -static int -migration_data_written(UNUSED vfu_ctx_t *vfu_ctx, UNUSED uint64_t count) -{ - /* - * We apply migration state directly in the migration_write_data callback, - * so we don't need to do anything here. We would have to apply migration - * state in this callback if the migration region was memory mappable, in - * which case we wouldn't know when the client wrote migration data. - */ + uint32_t bytes_written = write_end - write_start; - return 0; -} + if (write_end <= server_data->bar1_size) { + // case 1: entire write lies within bar1 + // TODO check the following is always allowed -static size_t -nr_pages(size_t size) -{ - return (size / sysconf(_SC_PAGE_SIZE) + - (size % sysconf(_SC_PAGE_SIZE) > 1)); -} + memcpy(server_data->bar1 + write_start, buf, bytes_written); + } else if ( + write_start < server_data->bar1_size // starts in bar1 + && write_end > server_data->bar1_size // ends in bar0 + ) { + // case 2: part of the write in bar1 and part of the write in bar0 + // TODO check the following is always allowed -static size_t -page_align(size_t size) -{ - return nr_pages(size) * sysconf(_SC_PAGE_SIZE); + uint32_t length_in_bar1 = server_data->bar1_size - write_start; + uint32_t length_in_bar0 = write_end - server_data->bar1_size; + assert(length_in_bar1 + length_in_bar0 == bytes_written); + + memcpy(server_data->bar1 + write_start, buf, length_in_bar1); + memcpy(&server_data->bar0, buf + length_in_bar1, length_in_bar0); + } else if (write_start >= server_data->bar1_size) { + // case 3: entire write lies within bar0 + // TODO check the following is always allowed + + write_start -= server_data->bar1_size; + write_end -= server_data->bar1_size; + + memcpy(&server_data->bar0 + write_start, buf, bytes_written); + } + + server_data->migration.pending_write -= bytes_written; + + return bytes_written; } int main(int argc, char *argv[]) @@ -425,10 +421,10 @@ int main(int argc, char *argv[]) char template[] = "/tmp/libvfio-user.XXXXXX"; int ret; bool verbose = false; + bool destination = false; int opt; struct sigaction act = {.sa_handler = _sa_handler}; const size_t bar1_size = 0x3000; - size_t migr_regs_size, migr_data_size, migr_size; struct server_data server_data = { .migration = { .state = VFU_MIGR_STATE_RUNNING @@ -440,20 +436,23 @@ int main(int argc, char *argv[]) const vfu_migration_callbacks_t migr_callbacks = { .version = VFU_MIGR_CALLBACKS_VERS, .transition = &migration_device_state_transition, - .get_pending_bytes = &migration_get_pending_bytes, - .prepare_data = &migration_prepare_data, .read_data = &migration_read_data, - .data_written = &migration_data_written, .write_data = &migration_write_data }; - while ((opt = getopt(argc, argv, "v")) != -1) { + while ((opt = getopt(argc, argv, "vr")) != -1) { switch (opt) { case 'v': verbose = true; break; + case 'r': + destination = true; + server_data.migration.state = VFU_MIGR_STATE_RESUME; + server_data.migration.pending_write = + bar1_size + sizeof(time_t); + break; default: /* '?' */ - errx(EXIT_FAILURE, "Usage: %s [-v] ", argv[0]); + errx(EXIT_FAILURE, "Usage: %s [-v] [-r] ", argv[0]); } } @@ -502,9 +501,6 @@ int main(int argc, char *argv[]) * are mappable. The client can still mmap the 2nd page, we can't prohibit * this under Linux. If we really want to prohibit it we have to use * separate files for the same region. - * - * We choose to use a single file which contains both BAR1 and the migration - * registers. They could also be completely different files. */ if ((tmpfd = mkstemp(template)) == -1) { err(EXIT_FAILURE, "failed to create backing file"); @@ -514,16 +510,7 @@ int main(int argc, char *argv[]) server_data.bar1_size = bar1_size; - /* - * The migration registers aren't memory mappable, so in order to make the - * rest of the migration region memory mappable we must effectively reserve - * an entire page. - */ - migr_regs_size = vfu_get_migr_register_area_size(); - migr_data_size = page_align(bar1_size + sizeof(time_t)); - migr_size = migr_regs_size + migr_data_size; - - if (ftruncate(tmpfd, server_data.bar1_size + migr_size) == -1) { + if (ftruncate(tmpfd, server_data.bar1_size) == -1) { err(EXIT_FAILURE, "failed to truncate backing file"); } server_data.bar1 = mmap(NULL, server_data.bar1_size, PROT_READ | PROT_WRITE, @@ -543,29 +530,12 @@ int main(int argc, char *argv[]) err(EXIT_FAILURE, "failed to setup BAR1 region"); } - /* setup migration */ - - struct iovec migr_mmap_areas[] = { - [0] = { - .iov_base = (void *)migr_regs_size, - .iov_len = migr_data_size - }, - }; - - /* - * The migration region comes after bar1 in the backing file, so offset is - * server_data.bar1_size. - */ - ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX, migr_size, - NULL, VFU_REGION_FLAG_RW, migr_mmap_areas, - ARRAY_SIZE(migr_mmap_areas), tmpfd, - server_data.bar1_size); - if (ret < 0) { - err(EXIT_FAILURE, "failed to setup migration region"); - } - - ret = vfu_setup_device_migration_callbacks(vfu_ctx, &migr_callbacks, - migr_regs_size); + ret = vfu_setup_device_migration_callbacks( + vfu_ctx, + destination ? LIBVFIO_USER_MIG_FLAG_START_RESUMING : 0, + &migr_callbacks + ); + if (ret < 0) { err(EXIT_FAILURE, "failed to setup device migration"); } -- cgit v1.1