aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Henderson <william.henderson@nutanix.com>2023-07-10 10:48:58 +0000
committerJohn Levon <john.levon@nutanix.com>2023-09-15 13:05:01 +0100
commit2ce5c70f9e0dc1bd159cec9cfa1f7cff7fef3596 (patch)
tree8841ea8a94694990b04e2ee1920901f8622ce786
parent5562eb7ca9971ca9ad895519e2ee834e721545d8 (diff)
downloadlibvfio-user-2ce5c70f9e0dc1bd159cec9cfa1f7cff7fef3596.zip
libvfio-user-2ce5c70f9e0dc1bd159cec9cfa1f7cff7fef3596.tar.gz
libvfio-user-2ce5c70f9e0dc1bd159cec9cfa1f7cff7fef3596.tar.bz2
update samples to use migration v2
Signed-off-by: William Henderson <william.henderson@nutanix.com>
-rw-r--r--samples/client.c309
-rw-r--r--samples/gpio-pci-idio-16.c58
-rw-r--r--samples/server.c236
3 files changed, 280 insertions, 323 deletions
diff --git a/samples/client.c b/samples/client.c
index d4abd21..121122a 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -62,6 +62,8 @@ static char const *irq_to_str[] = {
[VFU_DEV_REQ_IRQ] = "REQ"
};
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
void
vfu_log(UNUSED vfu_ctx_t *vfu_ctx, UNUSED int level,
const char *fmt, ...)
@@ -217,9 +219,7 @@ static bool
get_region_vfio_caps(struct vfio_info_cap_header *header,
struct vfio_region_info_cap_sparse_mmap **sparse)
{
- struct vfio_region_info_cap_type *type;
unsigned int i;
- bool migr = false;
while (true) {
switch (header->id) {
@@ -234,16 +234,6 @@ get_region_vfio_caps(struct vfio_info_cap_header *header,
(ull_t)(*sparse)->areas[i].size);
}
break;
- case VFIO_REGION_INFO_CAP_TYPE:
- type = (struct vfio_region_info_cap_type*)header;
- if (type->type != VFIO_REGION_TYPE_MIGRATION ||
- type->subtype != VFIO_REGION_SUBTYPE_MIGRATION) {
- errx(EXIT_FAILURE, "bad region type %d/%d", type->type,
- type->subtype);
- }
- migr = true;
- printf("client: migration region\n");
- break;
default:
errx(EXIT_FAILURE, "bad VFIO cap ID %#x", header->id);
}
@@ -252,7 +242,7 @@ get_region_vfio_caps(struct vfio_info_cap_header *header,
}
header = (struct vfio_info_cap_header*)((char*)header + header->next - sizeof(struct vfio_region_info));
}
- return migr;
+ return false;
}
static void
@@ -347,8 +337,7 @@ get_device_region_info(int sock, uint32_t index)
if (get_region_vfio_caps((struct vfio_info_cap_header*)(region_info + 1),
&sparse)) {
if (sparse != NULL) {
- assert((index == VFU_PCI_DEV_BAR1_REGION_IDX && nr_fds == 2) ||
- (index == VFU_PCI_DEV_MIGR_REGION_IDX && nr_fds == 1));
+ assert((index == VFU_PCI_DEV_BAR1_REGION_IDX && nr_fds == 2));
assert(nr_fds == sparse->nr_areas);
mmap_sparse_areas(fds, region_info, sparse);
}
@@ -386,7 +375,7 @@ get_device_info(int sock, struct vfio_user_device_info *dev_info)
err(EXIT_FAILURE, "failed to get device info");
}
- if (dev_info->num_regions != 10) {
+ if (dev_info->num_regions != 9) {
errx(EXIT_FAILURE, "bad number of device regions %d",
dev_info->num_regions);
}
@@ -471,7 +460,6 @@ access_region(int sock, int region, bool is_write, uint64_t offset,
.iov_len = data_len
}
};
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
struct vfio_user_region_access *recv_data;
size_t nr_send_iovecs, recv_data_len;
int op, ret;
@@ -526,6 +514,114 @@ access_region(int sock, int region, bool is_write, uint64_t offset,
return 0;
}
+static int
+set_migration_state(int sock, uint32_t state)
+{
+ static int msg_id = 0xfab1;
+ struct vfio_user_device_feature req = {
+ .argsz = 16,
+ .flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE
+ };
+ struct vfio_user_device_feature_mig_state change_state = {
+ .device_state = state,
+ .data_fd = 0
+ };
+ struct iovec send_iovecs[3] = {
+ [1] = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ },
+ [2] = {
+ .iov_base = &change_state,
+ .iov_len = sizeof(change_state)
+ }
+ };
+ void* response = malloc(sizeof(req) + sizeof(change_state));
+
+ pthread_mutex_lock(&mutex);
+ int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_DEVICE_FEATURE,
+ send_iovecs, 3, NULL, 0, NULL,
+ response, sizeof(req) + sizeof(change_state),
+ NULL, 0);
+ pthread_mutex_unlock(&mutex);
+
+ if (ret < 0) {
+ return -1;
+ }
+
+ assert(memcmp(&req, response, sizeof(req)) == 0);
+ assert(memcmp(&change_state, response + sizeof(req),
+ sizeof(change_state)) == 0);
+
+ return ret;
+}
+
+static int
+read_migr_data(int sock, void *buf, size_t len)
+{
+ static int msg_id = 0x6904;
+ struct vfio_user_mig_data_without_data req = {
+ .argsz = 12,
+ .size = len
+ };
+ struct iovec send_iovecs[2] = {
+ [1] = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ }
+ };
+ struct vfio_user_mig_data_with_data *res = calloc(1, sizeof(req) + len);
+
+ pthread_mutex_lock(&mutex);
+ int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_MIG_DATA_READ,
+ send_iovecs, 2, NULL, 0, NULL,
+ res, sizeof(req) + len, NULL, 0);
+ pthread_mutex_unlock(&mutex);
+
+ if (ret < 0) {
+ free(res);
+ return -1;
+ }
+
+ memcpy(buf, res->data, res->size);
+
+ free(res);
+
+ return res->size;
+}
+
+static int
+write_migr_data(int sock, void *buf, size_t len)
+{
+ static int msg_id = 0x2023;
+ struct vfio_user_mig_data_with_data req = {
+ .argsz = 12 + len,
+ .size = len
+ };
+ struct iovec send_iovecs[3] = {
+ [1] = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ },
+ [2] = {
+ .iov_base = buf,
+ .iov_len = len
+ }
+ };
+
+ pthread_mutex_lock(&mutex);
+ int ret = tran_sock_msg_iovec(sock, msg_id--, VFIO_USER_MIG_DATA_WRITE,
+ send_iovecs, 3, NULL, 0, NULL,
+ &req, sizeof(req), NULL, 0);
+ pthread_mutex_unlock(&mutex);
+
+ if (ret < 0) {
+ return -1;
+ }
+
+ return ret;
+}
+
static void
access_bar0(int sock, time_t *t)
{
@@ -735,79 +831,37 @@ usage(char *argv0)
basename(argv0));
}
-/*
- * Normally each time the source client (QEMU) would read migration data from
- * the device it would send them to the destination client. However, since in
- * our sample both the source and the destination client are the same process,
- * we simply accumulate the migration data of each iteration and apply it to
- * the destination server at the end.
- *
- * Performs as many migration loops as @nr_iters or until the device has no
- * more migration data (pending_bytes is zero), which ever comes first. The
- * result of each migration iteration is stored in @migr_iter. @migr_iter must
- * be at least @nr_iters.
- *
- * @returns the number of iterations performed
- */
static size_t
-do_migrate(int sock, size_t nr_iters, struct iovec *migr_iter)
+do_migrate(int sock, size_t max_iters, size_t max_iter_size,
+ struct iovec *migr_iter)
{
int ret;
- uint64_t pending_bytes, data_offset, data_size;
- size_t i = 0;
-
- assert(nr_iters > 0);
-
- /* XXX read pending_bytes */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- offsetof(struct vfio_user_migration_info, pending_bytes),
- &pending_bytes, sizeof(pending_bytes));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to read pending_bytes");
- }
-
- for (i = 0; i < nr_iters && pending_bytes > 0; i++) {
+ size_t i;
+ bool is_more = true;
- /* XXX read data_offset and data_size */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- offsetof(struct vfio_user_migration_info, data_offset),
- &data_offset, sizeof(data_offset));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to read data_offset");
- }
+ for (i = 0; i < max_iters && is_more; i++) {
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- offsetof(struct vfio_user_migration_info, data_size),
- &data_size, sizeof(data_size));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to read data_size");
- }
+ migr_iter[i].iov_len = max_iter_size;
+ migr_iter[i].iov_base = malloc(migr_iter[i].iov_len);
- migr_iter[i].iov_len = data_size;
- migr_iter[i].iov_base = malloc(data_size);
if (migr_iter[i].iov_base == NULL) {
err(EXIT_FAILURE, "failed to allocate migration buffer");
}
/* XXX read migration data */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- data_offset,
- (char *)migr_iter[i].iov_base, data_size);
+ ret = read_migr_data(sock, migr_iter[i].iov_base, migr_iter[i].iov_len);
if (ret < 0) {
err(EXIT_FAILURE, "failed to read migration data");
}
- /* FIXME send migration data to the destination client process */
-
- /*
- * XXX read pending_bytes again to indicate to the server that the
- * migration data have been consumed.
- */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- offsetof(struct vfio_user_migration_info, pending_bytes),
- &pending_bytes, sizeof(pending_bytes));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to read pending_bytes");
+ if (ret < (int)migr_iter[i].iov_len) {
+ // FIXME is it pointless shuffling stuff around?
+ void* buf = malloc(ret);
+ memcpy(buf, migr_iter[i].iov_base, ret);
+ free(migr_iter[i].iov_base);
+ migr_iter[i].iov_base = buf;
+ migr_iter[i].iov_len = ret;
+ is_more = false;
}
}
return i;
@@ -844,7 +898,7 @@ fake_guest(void *arg)
if (ret != 0) {
err(EXIT_FAILURE, "fake guest failed to write garbage to BAR1");
}
- crc = rte_hash_crc(buf, fake_guest_data->bar1_size, crc);
+ crc = rte_hash_crc(buf, fake_guest_data->bar1_size, 0);
__sync_synchronize();
} while (!fake_guest_data->done);
@@ -859,7 +913,6 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters,
{
uint32_t device_state;
int ret;
- size_t _nr_iters;
pthread_t thread;
struct fake_guest_data fake_guest_data = {
.sock = sock,
@@ -868,13 +921,15 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters,
.crcp = crcp
};
+ size_t max_iter_size = 4096;
+
ret = pthread_create(&thread, NULL, fake_guest, &fake_guest_data);
if (ret != 0) {
errno = ret;
err(EXIT_FAILURE, "failed to create pthread");
}
- *nr_iters = 2;
+ *nr_iters = 8;
*migr_iters = malloc(sizeof(struct iovec) * *nr_iters);
if (*migr_iters == NULL) {
err(EXIT_FAILURE, NULL);
@@ -884,16 +939,17 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters,
* XXX set device state to pre-copy. This is technically optional but any
* VMM that cares about performance needs this.
*/
- device_state = VFIO_DEVICE_STATE_V1_SAVING | VFIO_DEVICE_STATE_V1_RUNNING;
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, device_state),
- &device_state, sizeof(device_state));
+ device_state = VFIO_DEVICE_STATE_PRE_COPY;
+ ret = set_migration_state(sock, device_state);
if (ret < 0) {
err(EXIT_FAILURE, "failed to write to device state");
}
- _nr_iters = do_migrate(sock, 1, *migr_iters);
- assert(_nr_iters == 1);
+ ret = do_migrate(sock, *nr_iters, max_iter_size, *migr_iters);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to do migration in pre-copy state");
+ }
+
printf("client: stopping fake guest thread\n");
fake_guest_data.done = true;
__sync_synchronize();
@@ -905,31 +961,25 @@ migrate_from(int sock, size_t *nr_iters, struct iovec **migr_iters,
printf("client: setting device state to stop-and-copy\n");
- device_state = VFIO_DEVICE_STATE_V1_SAVING;
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, device_state),
- &device_state, sizeof(device_state));
+ device_state = VFIO_DEVICE_STATE_STOP_COPY;
+ ret = set_migration_state(sock, device_state);
if (ret < 0) {
err(EXIT_FAILURE, "failed to write to device state");
}
- _nr_iters += do_migrate(sock, 1, (*migr_iters) + _nr_iters);
- if (_nr_iters != 2) {
- errx(EXIT_FAILURE,
- "expected 2 iterations instead of %zu while in stop-and-copy state",
- _nr_iters);
+ size_t iters = do_migrate(sock, *nr_iters, max_iter_size, *migr_iters);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to do migration in stop-and-copy state");
}
/* XXX read device state, migration must have finished now */
- device_state = VFIO_DEVICE_STATE_V1_STOP;
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, device_state),
- &device_state, sizeof(device_state));
+ device_state = VFIO_DEVICE_STATE_STOP;
+ ret = set_migration_state(sock, device_state);
if (ret < 0) {
err(EXIT_FAILURE, "failed to write to device state");
}
- return _nr_iters;
+ return iters;
}
static int
@@ -941,8 +991,7 @@ migrate_to(char *old_sock_path, int *server_max_fds,
int ret, sock;
char *sock_path;
struct stat sb;
- uint32_t device_state = VFIO_DEVICE_STATE_V1_RESUMING;
- uint64_t data_offset, data_len;
+ uint32_t device_state = VFIO_DEVICE_STATE_RESUMING;
size_t i;
uint32_t dst_crc;
char buf[bar1_size];
@@ -960,9 +1009,10 @@ migrate_to(char *old_sock_path, int *server_max_fds,
if (ret == -1) {
err(EXIT_FAILURE, "failed to fork");
}
- if (ret > 0) { /* child (destination server) */
+ if (ret == 0) { /* child (destination server) */
char *_argv[] = {
path_to_server,
+ (char *)"-r", // start in VFIO_DEVICE_STATE_RESUMING
(char *)"-v",
sock_path,
NULL
@@ -992,57 +1042,23 @@ migrate_to(char *old_sock_path, int *server_max_fds,
negotiate(sock, server_max_fds, server_max_data_xfer_size, pgsize);
- /* XXX set device state to resuming */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, device_state),
- &device_state, sizeof(device_state));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to set device state to resuming");
- }
-
for (i = 0; i < nr_iters; i++) {
- /* XXX read data offset */
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, false,
- offsetof(struct vfio_user_migration_info, data_offset),
- &data_offset, sizeof(data_offset));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to read migration data offset");
- }
-
/* XXX write migration data */
- /*
- * TODO write half of migration data via regular write and other half via
- * memopy map.
- */
- printf("client: writing migration device data %#llx-%#llx\n",
- (ull_t)data_offset,
- (ull_t)(data_offset + migr_iters[i].iov_len - 1));
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- data_offset, migr_iters[i].iov_base,
- migr_iters[i].iov_len);
+ printf("client: writing migration device data iter %zu\n", i);
+ ret = write_migr_data(sock, migr_iters[i].iov_base,
+ migr_iters[i].iov_len);
if (ret < 0) {
err(EXIT_FAILURE, "failed to write device migration data");
}
-
- /* XXX write data_size */
- data_len = migr_iters[i].iov_len;
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, data_size),
- &data_len, sizeof(data_len));
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to write migration data size");
- }
}
- /* XXX set device state to running */
- device_state = VFIO_DEVICE_STATE_V1_RUNNING;
- ret = access_region(sock, VFU_PCI_DEV_MIGR_REGION_IDX, true,
- offsetof(struct vfio_user_migration_info, device_state),
- &device_state, sizeof(device_state));
+ /* XXX set device state to stop to finish the transfer */
+ device_state = VFIO_DEVICE_STATE_STOP;
+ ret = set_migration_state(sock, device_state);
if (ret < 0) {
- err(EXIT_FAILURE, "failed to set device state to running");
+ err(EXIT_FAILURE, "failed to set device state to stop");
}
/* validate contents of BAR1 */
@@ -1056,6 +1072,15 @@ migrate_to(char *old_sock_path, int *server_max_fds,
if (dst_crc != src_crc) {
fprintf(stderr, "client: CRC mismatch: %u != %u\n", src_crc, dst_crc);
abort();
+ } else {
+ fprintf(stdout, "client: CRC match, we did it! :)\n");
+ }
+
+ /* XXX set device state to running */
+ device_state = VFIO_DEVICE_STATE_RUNNING;
+ ret = set_migration_state(sock, device_state);
+ if (ret < 0) {
+ err(EXIT_FAILURE, "failed to set device state to running");
}
return sock;
diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c
index b50f407..b323249 100644
--- a/samples/gpio-pci-idio-16.c
+++ b/samples/gpio-pci-idio-16.c
@@ -77,49 +77,23 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
return 0;
}
-static uint64_t
-migration_get_pending_bytes(UNUSED vfu_ctx_t *vfu_ctx)
-{
- if (dirty) {
- return sizeof(pin);
- }
- return 0;
-}
-
-static int
-migration_prepare_data(UNUSED vfu_ctx_t *vfu_ctx,
- uint64_t *offset, uint64_t *size)
-{
- *offset = 0;
- if (size != NULL) { /* null means resuming */
- *size = sizeof(pin);
- }
- return 0;
-}
-
static ssize_t
-migration_read_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf,
- uint64_t size, uint64_t offset)
+migration_read_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, uint64_t size)
{
- assert(offset == 0);
assert(size == sizeof(pin));
- memcpy(buf, &pin, sizeof(pin));
- dirty = false;
- return 0;
-}
-static int
-migration_data_written(UNUSED vfu_ctx_t *vfu_ctx, uint64_t count)
-{
- assert(count == sizeof(pin));
- return 0;
+ if (dirty) {
+ memcpy(buf, &pin, sizeof(pin));
+ dirty = false;
+ return sizeof(pin);
+ } else {
+ return 0;
+ }
}
static ssize_t
-migration_write_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf,
- uint64_t size, uint64_t offset)
+migration_write_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, uint64_t size)
{
- assert(offset == 0);
assert(size == sizeof(pin));
memcpy(&pin, buf, sizeof(pin));
return 0;
@@ -145,16 +119,10 @@ main(int argc, char *argv[])
int opt;
struct sigaction act = { .sa_handler = _sa_handler };
vfu_ctx_t *vfu_ctx;
- size_t migr_regs_size = vfu_get_migr_register_area_size();
- size_t migr_data_size = sysconf(_SC_PAGE_SIZE);
- size_t migr_size = migr_regs_size + migr_data_size;
const vfu_migration_callbacks_t migr_callbacks = {
.version = VFU_MIGR_CALLBACKS_VERS,
.transition = &migration_device_state_transition,
- .get_pending_bytes = &migration_get_pending_bytes,
- .prepare_data = &migration_prepare_data,
.read_data = &migration_read_data,
- .data_written = &migration_data_written,
.write_data = &migration_write_data
};
@@ -214,13 +182,7 @@ main(int argc, char *argv[])
}
if (enable_migr) {
- ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX, migr_size,
- NULL, VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to setup migration region");
- }
- ret = vfu_setup_device_migration_callbacks(vfu_ctx, &migr_callbacks,
- migr_regs_size);
+ ret = vfu_setup_device_migration_callbacks(vfu_ctx, 0, &migr_callbacks);
if (ret < 0) {
err(EXIT_FAILURE, "failed to setup device migration");
}
diff --git a/samples/server.c b/samples/server.c
index 37c5d78..2b93771 100644
--- a/samples/server.c
+++ b/samples/server.c
@@ -62,7 +62,8 @@ struct server_data {
size_t bar1_size;
struct dma_regions regions[NR_DMA_REGIONS];
struct {
- uint64_t pending_bytes;
+ uint64_t pending_read;
+ uint64_t pending_write;
vfu_migr_state_t state;
} migration;
};
@@ -134,7 +135,7 @@ bar1_access(vfu_ctx_t *vfu_ctx, char * const buf,
if (is_write) {
if (server_data->migration.state == VFU_MIGR_STATE_PRE_COPY) {
/* dirty the whole thing */
- server_data->migration.pending_bytes = server_data->bar1_size;
+ server_data->migration.pending_read = server_data->bar1_size;
}
memcpy(server_data->bar1 + offset, buf, count);
} else {
@@ -274,19 +275,20 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
if (setitimer(ITIMER_REAL, &new, NULL) != 0) {
err(EXIT_FAILURE, "failed to disable timer");
}
- server_data->migration.pending_bytes = server_data->bar1_size + sizeof(time_t); /* FIXME BAR0 region size */
+ server_data->migration.pending_read = server_data->bar1_size + sizeof(time_t); /* FIXME BAR0 region size */
break;
case VFU_MIGR_STATE_PRE_COPY:
- /* TODO must be less than size of data region in migration region */
- server_data->migration.pending_bytes = server_data->bar1_size;
+ server_data->migration.pending_read = server_data->bar1_size;
break;
case VFU_MIGR_STATE_STOP:
/* FIXME should gracefully fail */
- assert(server_data->migration.pending_bytes == 0);
+ assert(server_data->migration.pending_read == 0);
break;
case VFU_MIGR_STATE_RESUME:
+ server_data->migration.pending_write = server_data->bar1_size + sizeof(time_t);
break;
case VFU_MIGR_STATE_RUNNING:
+ assert(server_data->migration.pending_write == 0);
ret = arm_timer(vfu_ctx, server_data->bar0);
if (ret < 0) {
return ret;
@@ -299,125 +301,119 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
return 0;
}
-static uint64_t
-migration_get_pending_bytes(vfu_ctx_t *vfu_ctx)
-{
- struct server_data *server_data = vfu_get_private(vfu_ctx);
- return server_data->migration.pending_bytes;
-}
-
-static int
-migration_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset, uint64_t *size)
-{
- struct server_data *server_data = vfu_get_private(vfu_ctx);
-
- *offset = 0;
- if (size != NULL) {
- *size = server_data->migration.pending_bytes;
- }
- return 0;
-}
-
static ssize_t
-migration_read_data(vfu_ctx_t *vfu_ctx, void *buf,
- uint64_t size, uint64_t offset)
+migration_read_data(vfu_ctx_t *vfu_ctx, void *buf, uint64_t size)
{
struct server_data *server_data = vfu_get_private(vfu_ctx);
- if (server_data->migration.state != VFU_MIGR_STATE_PRE_COPY &&
- server_data->migration.state != VFU_MIGR_STATE_STOP_AND_COPY)
- {
- return size;
- }
-
/*
- * For ease of implementation we expect the client to read all migration
- * data in one go; partial reads are not supported. This is allowed by VFIO
- * however we don't yet support it. Similarly, when resuming, partial
- * writes are supported by VFIO, however we don't in this sample.
- *
* If in pre-copy state we copy BAR1, if in stop-and-copy state we copy
* both BAR1 and BAR0. Since we always copy BAR1 in the stop-and-copy state,
* copying BAR1 in the pre-copy state is pointless. Fixing this requires
* more complex state tracking which exceeds the scope of this sample.
*/
- if (offset != 0 || size != server_data->migration.pending_bytes) {
- errno = EINVAL;
- return -1;
+ if (server_data->migration.pending_read == 0 || size == 0) {
+ return 0;
}
- memcpy(buf, server_data->bar1, server_data->bar1_size);
+ uint32_t total_read = server_data->bar1_size;
+
if (server_data->migration.state == VFU_MIGR_STATE_STOP_AND_COPY) {
- memcpy(buf + server_data->bar1_size, &server_data->bar0,
- sizeof(server_data->bar0));
+ total_read += sizeof(server_data->bar0);
+ }
+
+ uint32_t read_start = total_read - server_data->migration.pending_read;
+ uint32_t read_end = MIN(read_start + size, total_read); // exclusive
+ assert(read_end > read_start);
+
+ uint32_t bytes_read = read_end - read_start;
+
+ if (read_end <= server_data->bar1_size) {
+ // case 1: entire read lies within bar1
+ // TODO check the following is always allowed
+
+ memcpy(buf, server_data->bar1 + read_start, bytes_read);
+ } else if (
+ read_start < server_data->bar1_size // starts in bar1
+ && read_end > server_data->bar1_size // ends in bar0
+ ) {
+ // case 2: part of the read in bar1 and part of the read in bar0
+ // TODO check the following is always allowed
+
+ uint32_t length_in_bar1 = server_data->bar1_size - read_start;
+ uint32_t length_in_bar0 = read_end - server_data->bar1_size;
+ assert(length_in_bar1 + length_in_bar0 == bytes_read);
+
+ memcpy(buf, server_data->bar1 + read_start, length_in_bar1);
+ memcpy(buf + length_in_bar1, &server_data->bar0, length_in_bar0);
+ } else if (read_start >= server_data->bar1_size) {
+ // case 3: entire read lies within bar0
+ // TODO check the following is always allowed
+
+ read_start -= server_data->bar1_size;
+ read_end -= server_data->bar1_size;
+
+ memcpy(buf, &server_data->bar0 + read_start, bytes_read);
}
- server_data->migration.pending_bytes = 0;
- return size;
+ server_data->migration.pending_read -= bytes_read;
+
+ return bytes_read;
}
static ssize_t
-migration_write_data(vfu_ctx_t *vfu_ctx, void *data,
- uint64_t size, uint64_t offset)
+migration_write_data(vfu_ctx_t *vfu_ctx, void *data, uint64_t size)
{
struct server_data *server_data = vfu_get_private(vfu_ctx);
char *buf = data;
- int ret;
assert(server_data != NULL);
assert(data != NULL);
- if (offset != 0 || size < server_data->bar1_size) {
- vfu_log(vfu_ctx, LOG_DEBUG, "XXX bad migration data write %#llx-%#llx",
- (unsigned long long)offset,
- (unsigned long long)offset + size - 1);
- errno = EINVAL;
- return -1;
- }
-
- memcpy(server_data->bar1, buf, server_data->bar1_size);
- buf += server_data->bar1_size;
- size -= server_data->bar1_size;
- if (size == 0) {
+ if (server_data->migration.pending_write == 0 || size == 0) {
return 0;
}
- if (size != sizeof(server_data->bar0)) {
- errno = EINVAL;
- return -1;
- }
- memcpy(&server_data->bar0, buf, sizeof(server_data->bar0));
- ret = bar0_access(vfu_ctx, buf, sizeof(server_data->bar0), 0, true);
- assert(ret == (int)size); /* FIXME */
- return 0;
-}
+ uint32_t total_write = server_data->bar1_size + sizeof(server_data->bar0);
+ uint32_t write_start = total_write - server_data->migration.pending_write;
+ uint32_t write_end = MIN(write_start + size, total_write); // exclusive
+ assert(write_end > write_start);
-static int
-migration_data_written(UNUSED vfu_ctx_t *vfu_ctx, UNUSED uint64_t count)
-{
- /*
- * We apply migration state directly in the migration_write_data callback,
- * so we don't need to do anything here. We would have to apply migration
- * state in this callback if the migration region was memory mappable, in
- * which case we wouldn't know when the client wrote migration data.
- */
+ uint32_t bytes_written = write_end - write_start;
- return 0;
-}
+ if (write_end <= server_data->bar1_size) {
+ // case 1: entire write lies within bar1
+ // TODO check the following is always allowed
-static size_t
-nr_pages(size_t size)
-{
- return (size / sysconf(_SC_PAGE_SIZE) +
- (size % sysconf(_SC_PAGE_SIZE) > 1));
-}
+ memcpy(server_data->bar1 + write_start, buf, bytes_written);
+ } else if (
+ write_start < server_data->bar1_size // starts in bar1
+ && write_end > server_data->bar1_size // ends in bar0
+ ) {
+ // case 2: part of the write in bar1 and part of the write in bar0
+ // TODO check the following is always allowed
-static size_t
-page_align(size_t size)
-{
- return nr_pages(size) * sysconf(_SC_PAGE_SIZE);
+ uint32_t length_in_bar1 = server_data->bar1_size - write_start;
+ uint32_t length_in_bar0 = write_end - server_data->bar1_size;
+ assert(length_in_bar1 + length_in_bar0 == bytes_written);
+
+ memcpy(server_data->bar1 + write_start, buf, length_in_bar1);
+ memcpy(&server_data->bar0, buf + length_in_bar1, length_in_bar0);
+ } else if (write_start >= server_data->bar1_size) {
+ // case 3: entire write lies within bar0
+ // TODO check the following is always allowed
+
+ write_start -= server_data->bar1_size;
+ write_end -= server_data->bar1_size;
+
+ memcpy(&server_data->bar0 + write_start, buf, bytes_written);
+ }
+
+ server_data->migration.pending_write -= bytes_written;
+
+ return bytes_written;
}
int main(int argc, char *argv[])
@@ -425,10 +421,10 @@ int main(int argc, char *argv[])
char template[] = "/tmp/libvfio-user.XXXXXX";
int ret;
bool verbose = false;
+ bool destination = false;
int opt;
struct sigaction act = {.sa_handler = _sa_handler};
const size_t bar1_size = 0x3000;
- size_t migr_regs_size, migr_data_size, migr_size;
struct server_data server_data = {
.migration = {
.state = VFU_MIGR_STATE_RUNNING
@@ -440,20 +436,23 @@ int main(int argc, char *argv[])
const vfu_migration_callbacks_t migr_callbacks = {
.version = VFU_MIGR_CALLBACKS_VERS,
.transition = &migration_device_state_transition,
- .get_pending_bytes = &migration_get_pending_bytes,
- .prepare_data = &migration_prepare_data,
.read_data = &migration_read_data,
- .data_written = &migration_data_written,
.write_data = &migration_write_data
};
- while ((opt = getopt(argc, argv, "v")) != -1) {
+ while ((opt = getopt(argc, argv, "vr")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
+ case 'r':
+ destination = true;
+ server_data.migration.state = VFU_MIGR_STATE_RESUME;
+ server_data.migration.pending_write =
+ bar1_size + sizeof(time_t);
+ break;
default: /* '?' */
- errx(EXIT_FAILURE, "Usage: %s [-v] <socketpath>", argv[0]);
+ errx(EXIT_FAILURE, "Usage: %s [-v] [-r] <socketpath>", argv[0]);
}
}
@@ -502,9 +501,6 @@ int main(int argc, char *argv[])
* are mappable. The client can still mmap the 2nd page, we can't prohibit
* this under Linux. If we really want to prohibit it we have to use
* separate files for the same region.
- *
- * We choose to use a single file which contains both BAR1 and the migration
- * registers. They could also be completely different files.
*/
if ((tmpfd = mkstemp(template)) == -1) {
err(EXIT_FAILURE, "failed to create backing file");
@@ -514,16 +510,7 @@ int main(int argc, char *argv[])
server_data.bar1_size = bar1_size;
- /*
- * The migration registers aren't memory mappable, so in order to make the
- * rest of the migration region memory mappable we must effectively reserve
- * an entire page.
- */
- migr_regs_size = vfu_get_migr_register_area_size();
- migr_data_size = page_align(bar1_size + sizeof(time_t));
- migr_size = migr_regs_size + migr_data_size;
-
- if (ftruncate(tmpfd, server_data.bar1_size + migr_size) == -1) {
+ if (ftruncate(tmpfd, server_data.bar1_size) == -1) {
err(EXIT_FAILURE, "failed to truncate backing file");
}
server_data.bar1 = mmap(NULL, server_data.bar1_size, PROT_READ | PROT_WRITE,
@@ -543,29 +530,12 @@ int main(int argc, char *argv[])
err(EXIT_FAILURE, "failed to setup BAR1 region");
}
- /* setup migration */
-
- struct iovec migr_mmap_areas[] = {
- [0] = {
- .iov_base = (void *)migr_regs_size,
- .iov_len = migr_data_size
- },
- };
-
- /*
- * The migration region comes after bar1 in the backing file, so offset is
- * server_data.bar1_size.
- */
- ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX, migr_size,
- NULL, VFU_REGION_FLAG_RW, migr_mmap_areas,
- ARRAY_SIZE(migr_mmap_areas), tmpfd,
- server_data.bar1_size);
- if (ret < 0) {
- err(EXIT_FAILURE, "failed to setup migration region");
- }
-
- ret = vfu_setup_device_migration_callbacks(vfu_ctx, &migr_callbacks,
- migr_regs_size);
+ ret = vfu_setup_device_migration_callbacks(
+ vfu_ctx,
+ destination ? LIBVFIO_USER_MIG_FLAG_START_RESUMING : 0,
+ &migr_callbacks
+ );
+
if (ret < 0) {
err(EXIT_FAILURE, "failed to setup device migration");
}