aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/libmuser.c304
-rw-r--r--lib/libmuser_pci.c1
-rw-r--r--lib/muser.h59
-rw-r--r--samples/client.c228
-rw-r--r--samples/server.c76
5 files changed, 629 insertions, 39 deletions
diff --git a/lib/libmuser.c b/lib/libmuser.c
index 2d0805f..23c1f6a 100644
--- a/lib/libmuser.c
+++ b/lib/libmuser.c
@@ -87,6 +87,13 @@ typedef struct {
int efds[0]; /* XXX must be last */
} lm_irqs_t;
+enum migration_iteration_state {
+ VFIO_USER_MIGRATION_ITERATION_STATE_INITIAL,
+ VFIO_USER_MIGRATION_ITERATION_STATE_STARTED,
+ VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED,
+ VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED
+};
+
struct lm_ctx {
void *pvt;
dma_controller_t *dma;
@@ -112,7 +119,16 @@ struct lm_ctx {
int client_max_fds;
- size_t migration_pgsize;
+ struct {
+ struct vfio_device_migration_info info;
+ size_t pgsize;
+ lm_migration_callbacks_t callbacks;
+ struct {
+ enum migration_iteration_state state;
+ __u64 offset;
+ __u64 size;
+ } iter;
+ } migration;
lm_irqs_t irqs; /* XXX must be last */
};
@@ -507,7 +523,7 @@ set_version(lm_ctx_t *lm_ctx, int sock)
}
ret = recv_version(sock, &client_mj, &client_mn, &msg_id, true,
- &lm_ctx->client_max_fds, &lm_ctx->migration_pgsize);
+ &lm_ctx->client_max_fds, &lm_ctx->migration.pgsize);
if (ret < 0) {
lm_log(lm_ctx, LM_DBG, "failed to receive version: %s", strerror(-ret));
goto out;
@@ -520,7 +536,7 @@ set_version(lm_ctx_t *lm_ctx, int sock)
ret = -EINVAL;
goto out;
}
- if (lm_ctx->migration_pgsize == 0) {
+ if (lm_ctx->migration.pgsize == 0) {
lm_log(lm_ctx, LM_ERR, "bad migration page size");
ret = -EINVAL;
goto out;
@@ -528,7 +544,7 @@ set_version(lm_ctx_t *lm_ctx, int sock)
/* FIXME need to check max_fds */
- lm_ctx->migration_pgsize = MIN(lm_ctx->migration_pgsize,
+ lm_ctx->migration.pgsize = MIN(lm_ctx->migration.pgsize,
sysconf(_SC_PAGESIZE));
out:
free(server_caps);
@@ -1541,6 +1557,259 @@ handle_pci_config_space_access(lm_ctx_t *lm_ctx, char *buf, size_t count,
return count;
}
+/* valid migration state transitions */
+__u32 migration_states[VFIO_DEVICE_STATE_MASK] = {
+ [VFIO_DEVICE_STATE_STOP] = 1 << VFIO_DEVICE_STATE_STOP,
+ [VFIO_DEVICE_STATE_RUNNING] = /* running */
+ (1 << VFIO_DEVICE_STATE_STOP) |
+ (1 << VFIO_DEVICE_STATE_RUNNING) |
+ (1 << VFIO_DEVICE_STATE_SAVING) |
+ (1 << (VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING)) |
+ (1 << VFIO_DEVICE_STATE_RESUMING),
+ [VFIO_DEVICE_STATE_SAVING] = /* stop-and-copy */
+ (1 << VFIO_DEVICE_STATE_STOP) |
+ (1 << VFIO_DEVICE_STATE_SAVING),
+ [VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING] = /* pre-copy */
+ (1 << VFIO_DEVICE_STATE_SAVING) |
+ (1 << VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING),
+ [VFIO_DEVICE_STATE_RESUMING] = /* resuming */
+ (1 << VFIO_DEVICE_STATE_RUNNING) |
+ (1 << VFIO_DEVICE_STATE_RESUMING)
+};
+
+static bool
+_migration_state_transition_is_valid(__u32 from, __u32 to)
+{
+ return migration_states[from] & (1 << to);
+}
+
+static ssize_t
+handle_migration_device_state(lm_ctx_t *lm_ctx, __u32 *device_state,
+ bool is_write) {
+
+ int ret;
+
+ assert(lm_ctx != NULL);
+ assert(device_state != NULL);
+
+ if (!is_write) {
+ *device_state = lm_ctx->migration.info.device_state;
+ return 0;
+ }
+
+ if (*device_state & ~VFIO_DEVICE_STATE_MASK) {
+ return -EINVAL;
+ }
+
+ if (!_migration_state_transition_is_valid(lm_ctx->migration.info.device_state,
+ *device_state)) {
+ return -EINVAL;
+ }
+
+ switch (*device_state) {
+ case VFIO_DEVICE_STATE_STOP:
+ ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt,
+ LM_MIGR_STATE_STOP);
+ break;
+ case VFIO_DEVICE_STATE_RUNNING:
+ ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt,
+ LM_MIGR_STATE_START);
+ break;
+ case VFIO_DEVICE_STATE_SAVING:
+ /*
+ * FIXME How should the device operate during the stop-and-copy
+ * phase? Should we only allow the migration data to be read from
+ * the migration region? E.g. Access to any other region should be
+ * failed? This might be a good question to send to LKML.
+ */
+ ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt,
+ LM_MIGR_STATE_STOP_AND_COPY);
+ break;
+ case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING:
+ ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt,
+ LM_MIGR_STATE_PRE_COPY);
+ break;
+ case VFIO_DEVICE_STATE_RESUMING:
+ ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt,
+ LM_MIGR_STATE_RESUME);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret == 0) {
+ lm_ctx->migration.info.device_state = *device_state;
+ }
+
+ return ret;
+}
+
+static ssize_t
+handle_migration_pending_bytes(lm_ctx_t *lm_ctx, __u64 *pending_bytes,
+ bool is_write)
+{
+ assert(lm_ctx != NULL);
+ assert(pending_bytes != NULL);
+
+ if (is_write) {
+ return -EINVAL;
+ }
+
+ if (lm_ctx->migration.iter.state == VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED) {
+ *pending_bytes = 0;
+ return 0;
+ }
+
+ *pending_bytes = lm_ctx->migration.callbacks.get_pending_bytes(lm_ctx->pvt);
+
+ switch (lm_ctx->migration.iter.state) {
+ case VFIO_USER_MIGRATION_ITERATION_STATE_INITIAL:
+ case VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED:
+ /*
+ * FIXME what happens if data haven't been consumed in the previous
+ * iteration? Ask on LKML.
+ */
+ if (*pending_bytes == 0) {
+ lm_ctx->migration.iter.state = VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED;
+ } else {
+ lm_ctx->migration.iter.state = VFIO_USER_MIGRATION_ITERATION_STATE_STARTED;
+ }
+ break;
+ case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED:
+ /*
+ * Repeated reads of pending_bytes should not have any side effects.
+ * FIXME does it have to be the same as the previous value? Can it
+ * increase or even decrease? I suppose it can't be lower than
+ * data_size? Ask on LKML.
+ */
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static ssize_t
+handle_migration_data_offset(lm_ctx_t *lm_ctx, __u64 *offset, bool is_write)
+{
+ int ret;
+
+ assert(lm_ctx != NULL);
+ assert(offset != NULL);
+
+ if (is_write) {
+ return -EINVAL;
+ }
+
+ switch (lm_ctx->migration.iter.state) {
+ case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED:
+ break;
+ default:
+ /*
+ * FIXME it's not clear whether these registers can be accessed in
+ * other parts of the iteration, need clarification on the
+ * following:
+ *
+ * Read on data_offset and data_size should return the offset and
+ * size of the current buffer if the user application reads
+ * data_offset and data_size more than once here.
+ */
+ return -EINVAL;
+ }
+
+ ret = lm_ctx->migration.callbacks.prepare_data(lm_ctx->pvt,
+ &lm_ctx->migration.iter.offset,
+ &lm_ctx->migration.iter.size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *offset = lm_ctx->migration.iter.offset + sizeof(struct vfio_device_migration_info);
+
+ return ret;
+}
+
+static ssize_t
+handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write)
+{
+ assert(lm_ctx != NULL);
+ assert(size != NULL);
+
+ if (is_write) {
+ return -EINVAL;
+ }
+
+ switch (lm_ctx->migration.iter.state) {
+ case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED:
+ break;
+ default:
+ /* FIXME see comment in handle_migration_data_offset */
+ return -EINVAL;
+ }
+
+ *size = lm_ctx->migration.iter.size;
+
+ return 0;
+}
+
+static ssize_t
+handle_migration_region_access(lm_ctx_t *lm_ctx, char *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ int ret;
+
+ assert(lm_ctx != NULL);
+ assert(buf != NULL);
+
+ if (pos + count > lm_ctx->pci_info.reg_info[LM_DEV_MIGRATION_REG_IDX].size) {
+ lm_log(lm_ctx, LM_ERR, "read %#x-%#x past end of migration region",
+ pos, pos + count - 1);
+ return -EINVAL;
+ }
+ switch (pos) {
+ case offsetof(struct vfio_device_migration_info, device_state):
+ if (count != sizeof(lm_ctx->migration.info.device_state)) {
+ return -EINVAL;
+ }
+ ret = handle_migration_device_state(lm_ctx, (__u32*)buf,
+ is_write);
+ break;
+ case offsetof(struct vfio_device_migration_info, pending_bytes):
+ if (count != sizeof(lm_ctx->migration.info.pending_bytes)) {
+ return -EINVAL;
+ }
+ ret = handle_migration_pending_bytes(lm_ctx, (__u64*)buf, is_write);
+ break;
+ case offsetof(struct vfio_device_migration_info, data_offset):
+ if (count != sizeof(lm_ctx->migration.info.data_offset)) {
+ return -EINVAL;
+ }
+ ret = handle_migration_data_offset(lm_ctx, (__u64*)buf, is_write);
+ break;
+ case offsetof(struct vfio_device_migration_info, data_size):
+ if (count != sizeof(lm_ctx->migration.info.data_size)) {
+ return -EINVAL;
+ }
+ ret = handle_migration_data_size(lm_ctx, (__u64*)buf, is_write);
+ break;
+ default:
+ if (is_write) {
+ /* FIXME how do we handle the offset? */
+ ret = lm_ctx->migration.callbacks.write_data(lm_ctx->pvt,
+ buf, count);
+ } else {
+ ret = lm_ctx->migration.callbacks.read_data(lm_ctx->pvt,
+ buf, count,
+ pos - sizeof(struct vfio_device_migration_info));
+ }
+ }
+
+ if (ret == 0) {
+ ret = count;
+ }
+ return ret;
+}
+
static ssize_t
do_access(lm_ctx_t *lm_ctx, char *buf, size_t count, loff_t pos, bool is_write)
{
@@ -1569,6 +1838,11 @@ do_access(lm_ctx_t *lm_ctx, char *buf, size_t count, loff_t pos, bool is_write)
is_write);
}
+ if (idx == LM_DEV_MIGRATION_REG_IDX) {
+ return handle_migration_region_access(lm_ctx, buf, count, offset,
+ is_write);
+ }
+
/*
* Checking whether a callback exists might sound expensive however this
* code is not performance critical. This works well when we don't expect a
@@ -2098,7 +2372,7 @@ handle_dirty_pages_get(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr,
goto out;
}
*nr_iovecs = 1 + size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get);
- *iovecs = malloc(*nr_iovecs);
+ *iovecs = malloc(*nr_iovecs * sizeof(struct iovec));
if (*iovecs == NULL) {
ret = -errno;
goto out;
@@ -2154,7 +2428,7 @@ handle_dirty_pages(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr,
if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
ret = dma_controller_dirty_page_logging_start(lm_ctx->dma,
- lm_ctx->migration_pgsize);
+ lm_ctx->migration.pgsize);
} else if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) {
ret = dma_controller_dirty_page_logging_stop(lm_ctx->dma);
} else if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) {
@@ -2188,9 +2462,15 @@ process_request(lm_ctx_t *lm_ctx)
assert(lm_ctx != NULL);
+ if (lm_ctx->pci_info.reg_info[LM_DEV_CFG_REG_IDX].size > 0 &&
+ lm_ctx->migration.info.device_state == VFIO_DEVICE_STATE_STOP) {
+ return -ESHUTDOWN;
+ }
+
nr_fds = lm_ctx->client_max_fds;
fds = alloca(nr_fds * sizeof(int));
+ /* FIXME get request shouldn't set errno, it should return it as -errno */
ret = transports_ops[lm_ctx->trans].get_request(lm_ctx, &hdr, fds, &nr_fds);
if (unlikely(ret < 0)) {
if (ret == -EAGAIN || ret == -EWOULDBLOCK) {
@@ -2581,6 +2861,18 @@ pci_config_setup(lm_ctx_t *lm_ctx, const lm_dev_info_t *dev_info)
if (migr_reg->size < sizeof(struct vfio_device_migration_info)) {
return -EINVAL;
}
+
+ /* FIXME this should be done in lm_ctx_run or poll */
+ lm_ctx->migration.info.device_state = VFIO_DEVICE_STATE_RUNNING;
+
+ lm_ctx->migration.callbacks = dev_info->migration_callbacks;
+ if (lm_ctx->migration.callbacks.transition == NULL ||
+ lm_ctx->migration.callbacks.get_pending_bytes == NULL ||
+ lm_ctx->migration.callbacks.prepare_data == NULL ||
+ lm_ctx->migration.callbacks.read_data == NULL ||
+ lm_ctx->migration.callbacks.write_data == NULL) {
+ return -EINVAL;
+ }
}
return 0;
diff --git a/lib/libmuser_pci.c b/lib/libmuser_pci.c
index 78d65fa..711acc4 100644
--- a/lib/libmuser_pci.c
+++ b/lib/libmuser_pci.c
@@ -314,6 +314,7 @@ muser_is_pci_hdr_access(loff_t pos)
return pos - off >= 0 && pos - off < PCI_STD_HEADER_SIZEOF;
}
+/* FIXME this function is misleading, remove it */
int
muser_pci_hdr_access(lm_ctx_t *lm_ctx, size_t *count,
loff_t *pos, bool is_write,
diff --git a/lib/muser.h b/lib/muser.h
index 375be0e..949680c 100644
--- a/lib/muser.h
+++ b/lib/muser.h
@@ -139,6 +139,8 @@ typedef struct {
/*
* Callback function that is called when the region is read or written.
+ * Note that the memory of the region is owned by the user, except for the
+ * standard header (first 64 bytes) of the PCI configuration space.
*/
lm_region_access_t *fn;
@@ -260,6 +262,62 @@ typedef enum {
#define LM_MAX_CAPS (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF
+/*
+ * FIXME the names of migration callback functions are probably far too long,
+ * but for now it helps with the implementation.
+ */
+typedef int (lm_migration_callback_t)(void *pvt);
+
+typedef enum {
+ LM_MIGR_STATE_STOP,
+ LM_MIGR_STATE_START,
+ LM_MIGR_STATE_STOP_AND_COPY,
+ LM_MIGR_STATE_PRE_COPY,
+ LM_MIGR_STATE_RESUME
+} lm_migr_state_t;
+
+typedef struct {
+
+ /* migration state transition callback */
+ /* TODO rename to lm_migration_state_transition_callback */
+ /* FIXME maybe we should create a single callback and pass the state? */
+ int (*transition)(void *pvt, lm_migr_state_t state);
+
+ /* Callbacks for saving device state */
+
+ /*
+ * Function that is called to retrieve pending migration data. If migration
+ * data were previously made available (function prepare_data has been
+ * called) then calling this function signifies that they have been read
+ * (e.g. migration data can be discarded). If the function returns 0 then
+ * migration has finished and this function won't be called again.
+ */
+ __u64 (*get_pending_bytes)(void *pvt);
+
+ /*
+ * Function that is called to instruct the device to prepare migration data.
+ * The function must return only after migration data are available at the
+ * specified offset.
+ */
+ int (*prepare_data)(void *pvt, __u64 *offset, __u64 *size);
+
+ /*
+ * Function that is called to read migration data. offset and size can
+ * be any subrange on the offset and size previously returned by
+ * prepare_data. The function must return the amount of data read. This
+ * function can be called even if the migration data can be memory mapped.
+ *
+ * Does this mean that reading data_offset/data_size updates the values?
+ */
+ size_t (*read_data)(void *pvt, void *buf, __u64 count, __u64 offset);
+
+ /* Callback for restoring device state */
+
+ /* Fuction that is called for writing previously stored device state. */
+ size_t (*write_data)(void *pvt, void *data, __u64 size);
+
+} lm_migration_callbacks_t;
+
/**
* Device information structure, used to create the lm_ctx.
* To be filled and passed to lm_ctx_create()
@@ -326,6 +384,7 @@ typedef struct {
int nr_caps;
lm_cap_t **caps;
+ lm_migration_callbacks_t migration_callbacks;
} lm_dev_info_t;
diff --git a/samples/client.c b/samples/client.c
index 5ff79cd..0b42267 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -328,51 +328,93 @@ configure_irqs(int sock)
}
static int
-access_bar0(int sock)
+access_region(int sock, int region, bool is_write, uint64_t offset,
+ void *data, size_t data_len)
{
- struct {
- struct vfio_user_region_access region_access;
- time_t t;
- } __attribute__((packed)) data = {
- .region_access = {
- .region = LM_DEV_BAR0_REG_IDX,
- .count = sizeof(data.t)
+ struct vfio_user_region_access send_region_access = {
+ .offset = offset,
+ .region = region,
+ .count = data_len
+ };
+ struct iovec send_iovecs[3] = {
+ [1] = {
+ .iov_base = &send_region_access,
+ .iov_len = sizeof send_region_access
},
- .t = time(NULL)
+ [2] = {
+ .iov_base = data,
+ .iov_len = data_len
+ }
};
- uint16_t msg_id = 1;
- const int sleep_time = 1;
- struct vfio_user_region_access region_access = {};
- int ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_REGION_WRITE,
- &data, sizeof data, NULL, 0, NULL,
- &region_access, sizeof region_access);
- if (ret < 0) {
- fprintf(stderr, "failed to write to BAR0: %s\n", strerror(-ret));
+ struct {
+ struct vfio_user_region_access region_access;
+ char data[data_len];
+ } __attribute__((packed)) recv_data;
+ int op, ret;
+ size_t nr_send_iovecs, recv_data_len;
+
+ if (is_write) {
+ op = VFIO_USER_REGION_WRITE;
+ nr_send_iovecs = 3;
+ recv_data_len = sizeof(recv_data.region_access);
+ } else {
+ op = VFIO_USER_REGION_READ;
+ nr_send_iovecs = 2;
+ recv_data_len = sizeof(recv_data);
+ }
+
+ ret = _send_recv_vfio_user_msg(sock, 0, op,
+ send_iovecs, nr_send_iovecs,
+ NULL, 0, NULL,
+ &recv_data, recv_data_len);
+ if (ret != 0) {
+ fprintf(stderr, "failed to %s region %d %#lx-%#lx: %s\n",
+ is_write ? "write to" : "read from", region, offset,
+ offset + data_len - 1, strerror(-ret));
return ret;
}
- if (region_access.count != sizeof data.t) {
- fprintf(stderr, "bad written data length %d\n", region_access.count);
+ if (recv_data.region_access.count != data_len) {
+ fprintf(stderr, "bad %s data count, expected=%d, actual=%d\n",
+ is_write ? "write" : "read", data_len,
+ recv_data.region_access.count);
return -EINVAL;
}
- printf("wrote to BAR0: %ld\n", data.t);
+ /*
+ * TODO we could avoid the memcpy if _sed_recv_vfio_user_msg received the
+ * response into an iovec, but it's some work to implement it.
+ */
+ if (!is_write) {
+ memcpy(data, recv_data.data, data_len);
+ }
+ return 0;
+}
- msg_id++;
+static int
+access_bar0(int sock)
+{
+ time_t t = time(NULL);
+ const int sleep_time = 1;
+ int ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t);
+
+ if (ret < 0) {
+ fprintf(stderr, "failed to write to BAR0: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ printf("wrote to BAR0: %ld\n", t);
sleep(sleep_time);
- ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_REGION_READ,
- &data.region_access, sizeof data.region_access,
- NULL, 0, NULL, &data, sizeof data);
+ ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, &t, sizeof t);
if (ret < 0) {
fprintf(stderr, "failed to read from BAR0: %s\n", strerror(-ret));
return ret;
}
- assert(data.region_access.count == sizeof data.t);
- printf("read from BAR0: %ld\n", data.t);
+ printf("read from BAR0: %ld\n", t);
- assert(data.t >= sleep_time);
+ assert(t >= sleep_time);
return 0;
}
@@ -550,6 +592,105 @@ get_dirty_bitmaps(int sock, struct vfio_user_dma_region *dma_regions,
return 0;
}
+enum migration {
+ NO_MIGRATION,
+ MIGRATION_SOURCE,
+ MIGRATION_DESTINATION,
+};
+
+static void
+usage(char *path) {
+ fprintf(stderr, "Usage: %s [-h] [-m src|dst] /path/to/socket\n",
+ basename(path));
+}
+
+static int
+migrate_from(int sock)
+{
+ __u32 device_state = VFIO_DEVICE_STATE_SAVING;
+ __u64 pending_bytes, data_offset, data_size;
+ void *data;
+
+ /* XXX set device state to stop-and-copy */
+ int ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ fprintf(stderr, "failed to write to device state: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+
+ /* XXX read pending_bytes */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false,
+ offsetof(struct vfio_device_migration_info, pending_bytes),
+ &pending_bytes, sizeof pending_bytes);
+ if (ret < 0) {
+ fprintf(stderr, "failed to read pending_bytes: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ while (pending_bytes > 0) {
+
+ /* XXX read data_offset and data_size */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false,
+ offsetof(struct vfio_device_migration_info, data_offset),
+ &data_offset, sizeof data_offset);
+ if (ret < 0) {
+ fprintf(stderr, "failed to read data_offset: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false,
+ offsetof(struct vfio_device_migration_info, data_size),
+ &data_size, sizeof data_size);
+ if (ret < 0) {
+ fprintf(stderr, "failed to read data_size: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ /* XXX read migration data */
+ data = malloc(data_size);
+ if (data == NULL) {
+ return -errno;
+ }
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, data_offset,
+ data, data_size);
+ if (ret < 0) {
+ fprintf(stderr, "failed to read migration data: %s\n",
+ strerror(-ret));
+ }
+
+ /* FIXME send migration data to the destination client process */
+ printf("XXX migration: %#x bytes worth of data\n", data_size);
+
+ /*
+ * XXX read pending_bytes again to indicate to the sever that the
+ * migration data have been consumed.
+ */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false,
+ offsetof(struct vfio_device_migration_info, pending_bytes),
+ &pending_bytes, sizeof pending_bytes);
+ if (ret < 0) {
+ fprintf(stderr, "failed to read pending_bytes: %s\n", strerror(-ret));
+ return ret;
+ }
+ }
+
+ /* XXX read device state, migration must have finished now */
+ device_state = VFIO_DEVICE_STATE_STOP;
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ fprintf(stderr, "failed to write to device state: %s\n",
+ strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
int ret, sock;
@@ -565,13 +706,36 @@ int main(int argc, char *argv[])
size_t pgsize;
int nr_dma_regions;
struct vfio_iommu_type1_dirty_bitmap dirty_bitmap = {0};
+ int opt;
+ enum migration migration = NO_MIGRATION;
+
+ while ((opt = getopt(argc, argv, "hm:")) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ exit(EXIT_SUCCESS);
+ case 'm':
+ if (strcmp(optarg, "src") == 0) {
+ migration = MIGRATION_SOURCE;
+ } else if (strcmp(optarg, "dst") == 0) {
+ migration = MIGRATION_DESTINATION;
+ } else {
+ fprintf(stderr, "invalid migration argument %s\n", optarg);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ default:
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
- if (argc != 2) {
- fprintf(stderr, "usage: %s /path/to/socket\n", argv[0]);
+ if (argc != optind + 1) {
+ usage(argv[0]);
exit(EXIT_FAILURE);
}
- if ((sock = init_sock(argv[1])) < 0) {
+ if ((sock = init_sock(argv[optind])) < 0) {
return sock;
}
@@ -727,6 +891,10 @@ int main(int argc, char *argv[])
return ret;
}
+ if (migration == MIGRATION_SOURCE) {
+ ret = migrate_from(sock);
+ }
+
return 0;
}
diff --git a/samples/server.c b/samples/server.c
index 9f4d3b1..4611fb0 100644
--- a/samples/server.c
+++ b/samples/server.c
@@ -40,6 +40,7 @@
#include <assert.h>
#include <openssl/md5.h>
#include <sys/mman.h>
+#include <sys/param.h>
#include "../lib/muser.h"
@@ -54,6 +55,11 @@ struct server_data {
time_t bar0;
uint8_t *bar1;
struct dma_regions regions[NR_DMA_REGIONS];
+ struct {
+ int fake_internal_state;
+ __u64 pending_bytes;
+ __u64 data_size;
+ } migration;
};
static void
@@ -205,8 +211,65 @@ static int device_reset(void *pvt)
printf("device reset callback\n");
}
-int main(int argc, char *argv[])
+static int
+migration_device_state_transition(void *pvt, lm_migr_state_t state)
{
+ struct server_data *server_data = pvt;
+
+ printf("migration: transition to device state %d\n", state);
+
+ switch (state) {
+ case LM_MIGR_STATE_STOP_AND_COPY:
+ /* TODO must be less than size of data region in migration region */
+ server_data->migration.pending_bytes = sysconf(_SC_PAGESIZE);
+ break;
+ case LM_MIGR_STATE_STOP:
+ assert(server_data->migration.pending_bytes == 0);
+ break;
+ default:
+ assert(false); /* FIXME */
+ }
+ return 0;
+}
+
+static __u64
+migration_get_pending_bytes(void *pvt)
+{
+ struct server_data *server_data = pvt;
+ if (server_data->migration.data_size > 0) {
+ assert(server_data->migration.data_size <= server_data->migration.pending_bytes);
+ server_data->migration.pending_bytes -= server_data->migration.data_size;
+ }
+ return server_data->migration.pending_bytes;
+}
+
+static int
+migration_prepare_data(void *pvt, __u64 *offset, __u64 *size)
+{
+ struct server_data *server_data = pvt;
+
+ *offset = 0;
+ *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, sysconf(_SC_PAGESIZE) / 4);
+ return 0;
+}
+
+static size_t
+migration_read_data(void *pvt, void *buf, __u64 size, __u64 offset)
+{
+ struct server_data *server_data = pvt;
+
+ assert(server_data->migration.data_size >= size);
+
+ return 0;
+}
+
+static size_t
+migration_write_data(void *pvt, void *data, __u64 size)
+{
+ assert(false);
+}
+
+int main(int argc, char *argv[]){
int ret;
bool trans_sock = false, verbose = false;
char opt;
@@ -269,7 +332,7 @@ int main(int argc, char *argv[])
},
.reg_info[LM_DEV_MIGRATION_REG_IDX] = { /* migration region */
.flags = LM_REG_FLAG_RW,
- .size = sysconf(_SC_PAGESIZE),
+ .size = sizeof(struct vfio_device_migration_info) + sysconf(_SC_PAGESIZE),
.mmap_areas = sparse_areas,
},
.irq_count[LM_DEV_INTX_IRQ_IDX] = 1,
@@ -278,7 +341,14 @@ int main(int argc, char *argv[])
.reset = device_reset,
.map_dma = map_dma,
.unmap_dma = unmap_dma,
- .pvt = &server_data
+ .pvt = &server_data,
+ .migration_callbacks = {
+ .transition = &migration_device_state_transition,
+ .get_pending_bytes = &migration_get_pending_bytes,
+ .prepare_data = &migration_prepare_data,
+ .read_data = &migration_read_data,
+ .write_data = &migration_write_data
+ }
};
sigemptyset(&act.sa_mask);