aboutsummaryrefslogtreecommitdiff
path: root/samples/client.c
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2021-01-25 17:39:35 +0000
committerGitHub <noreply@github.com>2021-01-25 17:39:35 +0000
commitf2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3 (patch)
tree06c6a12d428fbfadbcccf0dea780c393f076f6ca /samples/client.c
parent915a5b01f34a61cacef5ae5b50a215d13628b8b0 (diff)
downloadlibvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.zip
libvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.tar.gz
libvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.tar.bz2
add pre-copy phase in live migration example (#247)
This patch adds a simplistic pre-copy phase in the live migration sample. The end goal is have a separate thread in the client to modify device state while the device is in the pre-copy phase. This will be done in later patches. BAR1 is saved during the pre-copy phase and BAR0 is saved during the stop-and-copy phase. This is purely for convenience. There are quite a few assumptions and FIXMEs, even in the client code. We plan to address them in future patches. Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'samples/client.c')
-rw-r--r--samples/client.c203
1 files changed, 144 insertions, 59 deletions
diff --git a/samples/client.c b/samples/client.c
index 18558b8..e7228ac 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -717,20 +717,29 @@ usage(char *path) {
basename(path));
}
-static void
-migrate_from(int sock, int migr_reg_index, void **data, __u64 *len)
+/*
+ * Normally each time the source client (QEMU) would read migration data from
+ * the device it would send them to the destination client. However, since in
+ * our sample both the source and the destination client are the same process,
+ * we simply accumulate the migration data of each iteration and apply it to
+ * the destination server at the end.
+ *
+ * Performs as many migration loops as @nr_iters or until the device has no
+ * more migration data (pending_bytes is zero), which ever comes first. The
+ * result of each migration iteration is stored in @migr_iter. @migr_iter must
+ * be at least @nr_iters.
+ *
+ * @returns the number of iterations performed
+ */
+static size_t
+do_migrate(int sock, int migr_reg_index, size_t nr_iters,
+ struct iovec *migr_iter)
{
- __u32 device_state = VFIO_DEVICE_STATE_SAVING;
+ int ret;
__u64 pending_bytes, data_offset, data_size;
+ size_t i = 0;
- /* XXX set device state to stop-and-copy */
- int ret = access_region(sock, migr_reg_index, true,
- offsetof(struct vfio_device_migration_info, device_state),
- &device_state, sizeof(device_state));
- if (ret < 0) {
- errx(EXIT_FAILURE, "failed to write to device state: %s",
- strerror(-ret));
- }
+ assert(nr_iters > 0);
/* XXX read pending_bytes */
ret = access_region(sock, migr_reg_index, false,
@@ -744,19 +753,7 @@ migrate_from(int sock, int migr_reg_index, void **data, __u64 *len)
/* We do expect some migration data. */
assert(pending_bytes > 0);
- /*
- * The only expectation about pending_bytes is whether it's zero or
- * non-zero, therefore it must be considered volatile, even acrosss
- * iterantions. In the sample server we know it's static so it's fairly
- * straightforward.
- */
- *len = pending_bytes;
- *data = malloc(*len);
- if (*data == NULL) {
- err(EXIT_FAILURE, "failed to allocate migration buffer");
- }
-
- while (pending_bytes > 0) {
+ for (i = 0; i < nr_iters && pending_bytes > 0; i++) {
/* XXX read data_offset and data_size */
ret = access_region(sock, migr_reg_index, false,
@@ -775,11 +772,17 @@ migrate_from(int sock, int migr_reg_index, void **data, __u64 *len)
strerror(-ret));
}
- assert(data_offset - sizeof(struct vfio_device_migration_info) + data_size <= *len);
+ assert(data_offset - sizeof(struct vfio_device_migration_info) + data_size <= pending_bytes);
+
+ migr_iter[i].iov_len = data_size;
+ migr_iter[i].iov_base = malloc(data_size);
+ if (migr_iter[i].iov_base == NULL) {
+ err(EXIT_FAILURE, "failed to allocate migration buffer");
+ }
/* XXX read migration data */
ret = access_region(sock, migr_reg_index, false, data_offset,
- (char*)*data + data_offset - sizeof(struct vfio_device_migration_info),
+ (char*)migr_iter[i].iov_base + data_offset - sizeof(struct vfio_device_migration_info),
data_size);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to read migration data: %s",
@@ -800,9 +803,43 @@ migrate_from(int sock, int migr_reg_index, void **data, __u64 *len)
strerror(-ret));
}
}
+ return i;
+}
- /* XXX read device state, migration must have finished now */
- device_state = VFIO_DEVICE_STATE_STOP;
+static size_t
+migrate_from(int sock, int migr_reg_index, size_t *nr_iters, struct iovec **migr_iters)
+{
+ __u32 device_state;
+ int ret;
+ size_t _nr_iters;
+
+ /*
+ * FIXME to fully demonstrate live migration we'll need a way to change
+ * device state while the client is running the migration iteration. One
+ * way to do that is to have the client randomly modify a big-ish device
+ * region while running the live migration iterations, and at some point
+ * stopping to do the stop-and-copy phase. It can store in a buffer the
+ * modifications it makes and then after the device has been migrated it
+ * should compare the buffer with the migrated device region.
+ */
+
+ /*
+ * TODO The server generates migration data while it's in pre-copy state.
+ *
+ * FIXME the server generates 4 rounds of migration data while in pre-copy
+ * state and 1 while in stop-and-copy state. Don't assume this.
+ */
+ *nr_iters = 5;
+ *migr_iters = malloc(sizeof(struct iovec) * *nr_iters);
+ if (*migr_iters == NULL) {
+ err(EXIT_FAILURE, NULL);
+ }
+
+ /*
+ * XXX set device state to pre-copy. This is technically optional but any
+ * VMM that cares about performance needs this.
+ */
+ device_state = VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING;
ret = access_region(sock, migr_reg_index, true,
offsetof(struct vfio_device_migration_info, device_state),
&device_state, sizeof(device_state));
@@ -810,18 +847,57 @@ migrate_from(int sock, int migr_reg_index, void **data, __u64 *len)
errx(EXIT_FAILURE, "failed to write to device state: %s",
strerror(-ret));
}
+
+ _nr_iters = do_migrate(sock, migr_reg_index, 4, *migr_iters);
+
+ if (_nr_iters != 4) {
+ errx(EXIT_FAILURE,
+ "expected 4 iterations instead of %ld while in pre-copy state\n",
+ _nr_iters);
+ }
+
+ printf("setting device state to stop-and-copy\n");
+
+ device_state = VFIO_DEVICE_STATE_SAVING;
+ ret = access_region(sock, migr_reg_index, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write to device state: %s",
+ strerror(-ret));
+ }
+
+ _nr_iters += do_migrate(sock, migr_reg_index, 1, (*migr_iters) + _nr_iters);
+ if (_nr_iters != 5) {
+ errx(EXIT_FAILURE,
+ "expected 5 iterations instead of %ld while in stop-and-copy state\n",
+ _nr_iters);
+ }
+
+ /* XXX read device state, migration must have finished now */
+ device_state = VFIO_DEVICE_STATE_STOP;
+ ret = access_region(sock, migr_reg_index, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write to device state: %s",
+ strerror(-ret));
+ }
+
+ return _nr_iters;
}
static int
migrate_to(char *old_sock_path, int *server_max_fds,
- size_t *pgsize, void *migr_data, __u64 migr_data_len,
+ size_t *pgsize, size_t nr_iters, struct iovec *migr_iters,
char *path_to_server, int migr_reg_index)
{
int ret, sock;
char *sock_path;
struct stat sb;
__u32 device_state = VFIO_DEVICE_STATE_RESUMING;
- __u64 data_offset;
+ __u64 data_offset, data_len;
+ size_t i;
assert(old_sock_path != NULL);
@@ -874,33 +950,42 @@ migrate_to(char *old_sock_path, int *server_max_fds,
strerror(-ret));
}
- /* XXX read data offset */
- ret = access_region(sock, migr_reg_index, false,
- offsetof(struct vfio_device_migration_info, data_offset),
- &data_offset, sizeof(data_offset));
- if (ret < 0) {
- errx(EXIT_FAILURE, "failed to read data offset: %s", strerror(-ret));
- }
+ for (i = 0; i < nr_iters; i++) {
- /* XXX write migration data */
+ /* XXX read data offset */
+ ret = access_region(sock, migr_reg_index, false,
+ offsetof(struct vfio_device_migration_info, data_offset),
+ &data_offset, sizeof(data_offset));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to read migration data offset: %s",
+ strerror(-ret));
+ }
- /*
- * TODO write half of migration data via regular write and other half via
- * memopy map.
- */
- ret = access_region(sock, migr_reg_index, true,
- data_offset, migr_data, migr_data_len);
- if (ret < 0) {
- errx(EXIT_FAILURE, "failed to write migration data: %s",
- strerror(-ret));
- }
+ /* XXX write migration data */
- /* XXX write data_size */
- ret = access_region(sock, migr_reg_index, true,
- offsetof(struct vfio_device_migration_info, data_size),
- &migr_data_len, sizeof migr_data_len);
- if (ret < 0) {
- errx(EXIT_FAILURE, "failed to write data size: %s", strerror(-ret));
+ /*
+ * TODO write half of migration data via regular write and other half via
+ * memopy map.
+ */
+ printf("writing migration device data %#llx-%#llx\n", data_offset,
+ data_offset + migr_iters[i].iov_len - 1);
+ ret = access_region(sock, migr_reg_index, true,
+ data_offset, migr_iters[i].iov_base,
+ migr_iters[i].iov_len);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write device migration data: %s",
+ strerror(-ret));
+ }
+
+ /* XXX write data_size */
+ data_len = migr_iters[i].iov_len;
+ ret = access_region(sock, migr_reg_index, true,
+ offsetof(struct vfio_device_migration_info, data_size),
+ &data_len, sizeof data_len);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write migration data size: %s",
+ strerror(-ret));
+ }
}
/* XXX set device state to running */
@@ -953,11 +1038,11 @@ int main(int argc, char *argv[])
struct vfio_iommu_type1_dirty_bitmap dirty_bitmap = {0};
int opt;
time_t t;
- void *migr_data;
- __u64 migr_data_len;
char *path_to_server = NULL;
vfu_pci_hdr_t config_space;
int migr_reg_index;
+ struct iovec *migr_iters;
+ size_t nr_iters;
while ((opt = getopt(argc, argv, "h")) != -1) {
switch (opt) {
@@ -1117,13 +1202,13 @@ int main(int argc, char *argv[])
/*
* By sleeping here for 1s after migration finishes on the source server
- * (but not yet started on the destination server), the timer should be be
+ * (but not yet started on the destination server), the timer should be
* armed on the destination server for 2-1=1 seconds. If we don't sleep
* then it will be armed for 2 seconds, which isn't as interesting.
*/
sleep(1);
- migrate_from(sock, migr_reg_index, &migr_data, &migr_data_len);
+ migrate_from(sock, migr_reg_index, &nr_iters, &migr_iters);
/*
* Normally the client would now send the device state to the destination
@@ -1136,7 +1221,7 @@ int main(int argc, char *argv[])
}
sock = migrate_to(argv[optind], &server_max_fds, &pgsize,
- migr_data, migr_data_len, path_to_server, migr_reg_index);
+ nr_iters, migr_iters, path_to_server, migr_reg_index);
/*
* Now we must reconfigure the destination server.