aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorWilliam Henderson <william.henderson@nutanix.com>2023-09-15 16:07:01 +0100
committerGitHub <noreply@github.com>2023-09-15 16:07:01 +0100
commit190f85bf9c114bf7c981bb8908394368f84c0c04 (patch)
tree92273a811fc3a8af74a5f62cec8871f345d6999b /include
parent1569a37a54ecb63bd4008708c76339ccf7d06115 (diff)
downloadlibvfio-user-190f85bf9c114bf7c981bb8908394368f84c0c04.zip
libvfio-user-190f85bf9c114bf7c981bb8908394368f84c0c04.tar.gz
libvfio-user-190f85bf9c114bf7c981bb8908394368f84c0c04.tar.bz2
adapt to VFIO live migration v2 (#782)
This commit adapts the vfio-user protocol specification and the libvfio-user implementation to v2 of the VFIO live migration interface, as used in the kernel and QEMU. The differences between v1 and v2 are discussed in this email thread [1], and we slightly differ from upstream VFIO v2 in that instead of transferring data over a new FD, we use the existing UNIX socket with new commands VFIO_USER_MIG_DATA_READ/WRITE. We also don't yet use P2P states. The updated spec was submitted to qemu-devel [2]. [1] https://lore.kernel.org/all/20220130160826.32449-9-yishaih@nvidia.com/ [2] https://lore.kernel.org/all/20230718094150.110183-1-william.henderson@nutanix.com/ Signed-off-by: William Henderson <william.henderson@nutanix.com>
Diffstat (limited to 'include')
-rw-r--r--include/libvfio-user.h162
-rw-r--r--include/vfio-user.h105
2 files changed, 100 insertions, 167 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h
index 21cb99a..e4cfa60 100644
--- a/include/libvfio-user.h
+++ b/include/libvfio-user.h
@@ -583,21 +583,8 @@ typedef enum {
VFU_MIGR_STATE_RESUME
} vfu_migr_state_t;
-#define VFU_MIGR_CALLBACKS_VERS 1
+#define VFU_MIGR_CALLBACKS_VERS 2
-/*
- * Callbacks during the pre-copy and stop-and-copy phases.
- *
- * The client executes the following steps to copy migration data:
- *
- * 1. get_pending_bytes: device must return amount of migration data
- * 2. prepare_data: device must prepare migration data
- * 3. read_data: device must provide migration data
- *
- * The client repeats the above steps until there is no more migration data to
- * return (the device must return 0 from get_pending_bytes to indicate that
- * there are no more migration data to be consumed in this iteration).
- */
typedef struct {
/*
@@ -615,152 +602,30 @@ typedef struct {
* FIXME maybe we should create a single callback and pass the state?
*/
int (*transition)(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state);
-
- /* Callbacks for saving device state */
-
- /*
- * Function that is called to retrieve the amount of pending migration
- * data. If migration data were previously made available (function
- * prepare_data has been called) then calling this function signifies that
- * they have been read (e.g. migration data can be discarded). If the
- * function returns 0 then migration has finished and this function won't
- * be called again.
- *
- * The amount of pending migration data returned by the device does not
- * necessarily have to monotonically decrease over time and does not need
- * to match the amount of migration data returned via the @size argument in
- * prepare_data. It can completely fluctuate according to the needs of the
- * device. These semantics are derived from the pending_bytes register in
- * VFIO. Therefore the value returned by get_pending_bytes must be
- * primarily regarded as boolean, either 0 or non-zero, as far as migration
- * completion is concerned. More advanced vfio-user clients can make
- * assumptions on how migration is progressing on devices that guarantee
- * that the amount of pending migration data decreases over time.
- */
- uint64_t (*get_pending_bytes)(vfu_ctx_t *vfu_ctx);
-
- /*
- * Function that is called to instruct the device to prepare migration data
- * to be read when in pre-copy or stop-and-copy state, and to prepare for
- * receiving migration data when in resuming state.
- *
- * When in pre-copy and stop-and-copy state, the function must return only
- * after migration data are available at the specified offset. This
- * callback is called once per iteration. The amount of data available
- * pointed to by @size can be different that the amount of data returned by
- * get_pending_bytes in the beginning of the iteration.
- *
- * In VFIO, the data_offset and data_size registers can be read multiple
- * times during an iteration and are invariant, libvfio-user simplifies
- * this by caching the values and returning them when read, guaranteeing
- * that prepare_data() is called only once per migration iteration.
- *
- * When in resuming state, @offset must be set to where migration data must
- * written. @size points to NULL.
- *
- * The callback should return -1 on error, setting errno.
- */
- int (*prepare_data)(vfu_ctx_t *vfu_ctx, uint64_t *offset, uint64_t *size);
-
+
/*
- * Function that is called to read migration data. offset and size can be
- * any subrange on the offset and size previously returned by prepare_data.
- * The function must return the amount of data read or -1 on error, setting
- * errno.
+ * Function that is called to read `count` bytes of migration data into
+ * `buf`. The function must return the amount of data read or -1 on error,
+ * setting errno. The function may return less data than requested.
*
- * This function can be called even if the migration data can be memory
- * mapped.
+ * If the function returns zero, this is interpreted to mean that there is
+ * no more migration data to read.
*/
- ssize_t (*read_data)(vfu_ctx_t *vfu_ctx, void *buf,
- uint64_t count, uint64_t offset);
-
- /* Callbacks for restoring device state */
+ ssize_t (*read_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count);
/*
- * Fuction that is called for writing previously stored device state. The
+ * Function that is called for writing previously stored device state. The
* function must return the amount of data written or -1 on error, setting
- * errno.
- */
- ssize_t (*write_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count,
- uint64_t offset);
-
- /*
- * Function that is called when client has written some previously stored
- * device state.
- *
- * The callback should return -1 on error, setting errno.
+ * errno. Partial writes are not supported, so any return value other than
+ * `count` is invalid.
*/
- int (*data_written)(vfu_ctx_t *vfu_ctx, uint64_t count);
+ ssize_t (*write_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count);
} vfu_migration_callbacks_t;
-/**
- * The definition for VFIO_DEVICE_STATE_XXX differs with the version of vfio
- * header file used. Some old systems wouldn't have these definitions. Some
- * other newer systems would be using region based migration, and not
- * have VFIO_DEVICE_STATE_V1_XXXX defined. The latest ones have
- * VFIO_DEVICE_STATE_V1_XXXX defined. The following addresses all
- * these scenarios.
- */
-#if defined(VFIO_DEVICE_STATE_STOP)
-
-_Static_assert(VFIO_DEVICE_STATE_STOP == 0,
- "incompatible VFIO_DEVICE_STATE_STOP definition");
-
-#define VFIO_DEVICE_STATE_V1_STOP VFIO_DEVICE_STATE_STOP
-#define VFIO_DEVICE_STATE_V1_RUNNING VFIO_DEVICE_STATE_RUNNING
-#define VFIO_DEVICE_STATE_V1_SAVING VFIO_DEVICE_STATE_SAVING
-#define VFIO_DEVICE_STATE_V1_RESUMING VFIO_DEVICE_STATE_RESUMING
-
-#elif !defined(VFIO_REGION_TYPE_MIGRATION_DEPRECATED) /* VFIO_DEVICE_STATE_STOP */
-
-#define VFIO_DEVICE_STATE_V1_STOP (0)
-#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0)
-#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1)
-#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2)
-#define VFIO_DEVICE_STATE_MASK ((1 << 3) - 1)
-
-#endif /* VFIO_REGION_TYPE_MIGRATION_DEPRECATED */
-
-/*
- * The currently defined migration registers; if using migration callbacks,
- * these are handled internally by the library.
- *
- * This is analogous to struct vfio_device_migration_info.
- */
-struct vfio_user_migration_info {
- /* VFIO_DEVICE_STATE_* */
- uint32_t device_state;
- uint32_t reserved;
- uint64_t pending_bytes;
- uint64_t data_offset;
- uint64_t data_size;
-};
-
-/*
- * Returns the size of the area needed to hold the migration registers at the
- * beginning of the migration region; guaranteed to be page aligned.
- */
-size_t
-vfu_get_migr_register_area_size(void);
-
-/**
- * vfu_setup_device_migration provides an abstraction over the migration
- * protocol: the user specifies a set of callbacks which are called in response
- * to client accesses of the migration region; the migration region read/write
- * callbacks are not called after this function call. Offsets in callbacks are
- * relative to @data_offset.
- *
- * @vfu_ctx: the libvfio-user context
- * @callbacks: migration callbacks
- * @data_offset: offset in the migration region where data begins.
- *
- * @returns 0 on success, -1 on error, sets errno.
- */
int
vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx,
- const vfu_migration_callbacks_t *callbacks,
- uint64_t data_offset);
+ const vfu_migration_callbacks_t *callbacks);
/**
* Triggers an interrupt.
@@ -906,7 +771,6 @@ enum {
VFU_PCI_DEV_ROM_REGION_IDX,
VFU_PCI_DEV_CFG_REGION_IDX,
VFU_PCI_DEV_VGA_REGION_IDX,
- VFU_PCI_DEV_MIGR_REGION_IDX,
VFU_PCI_DEV_NUM_REGIONS,
};
diff --git a/include/vfio-user.h b/include/vfio-user.h
index a749938..0b115d3 100644
--- a/include/vfio-user.h
+++ b/include/vfio-user.h
@@ -66,7 +66,10 @@ enum vfio_user_command {
VFIO_USER_DMA_READ = 11,
VFIO_USER_DMA_WRITE = 12,
VFIO_USER_DEVICE_RESET = 13,
- VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_REGION_WRITE_MULTI = 15,
+ VFIO_USER_DEVICE_FEATURE = 16,
+ VFIO_USER_MIG_DATA_READ = 17,
+ VFIO_USER_MIG_DATA_WRITE = 18,
VFIO_USER_MAX,
};
@@ -200,31 +203,97 @@ typedef struct vfio_user_region_io_fds_reply {
} sub_regions[];
} __attribute__((packed)) vfio_user_region_io_fds_reply_t;
+/* Analogous to struct vfio_device_feature_dma_logging_range */
+struct vfio_user_device_feature_dma_logging_range {
+ uint64_t iova;
+ uint64_t length;
+} __attribute__((packed));
-/* Analogous to vfio_iommu_type1_dirty_bitmap. */
-struct vfio_user_dirty_pages {
- uint32_t argsz;
-#ifndef VFIO_IOMMU_DIRTY_PAGES_FLAG_START
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0)
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1)
-#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2)
-#endif
- uint32_t flags;
+/* Analogous to struct vfio_device_feature_dma_logging_control */
+struct vfio_user_device_feature_dma_logging_control {
+ uint64_t page_size;
+ uint32_t num_ranges;
+ uint32_t reserved;
+ struct vfio_user_device_feature_dma_logging_range ranges[];
} __attribute__((packed));
-/* Analogous to struct vfio_iommu_type1_dirty_bitmap_get. */
-struct vfio_user_bitmap_range {
+/* Analogous to struct vfio_device_feature_dma_logging_report */
+struct vfio_user_device_feature_dma_logging_report {
uint64_t iova;
- uint64_t size;
- struct vfio_user_bitmap bitmap;
+ uint64_t length;
+ uint64_t page_size;
+ uint8_t bitmap[];
+} __attribute__((packed));
+
+#ifndef VFIO_DEVICE_FEATURE_DMA_LOGGING_START
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+#endif
+
+/* Analogous to struct vfio_device_feature */
+struct vfio_user_device_feature {
+ uint32_t argsz;
+ uint32_t flags;
+#ifndef VFIO_DEVICE_FEATURE_MASK
+#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */
+#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */
+#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */
+#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */
+#endif
+ uint8_t data[];
+} __attribute__((packed));
+
+/* Analogous to struct vfio_device_feature_migration */
+struct vfio_user_device_feature_migration {
+ uint64_t flags;
+#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED
+#define VFIO_MIGRATION_STOP_COPY (1 << 0)
+#define VFIO_MIGRATION_P2P (1 << 1)
+#endif
+/*
+ * PRE_COPY was added in a later kernel version, after
+ * VFIO_REGION_TYPE_MIGRATION_DEPRECATED had been introduced.
+ */
+#ifndef VFIO_MIGRATION_PRE_COPY
+#define VFIO_MIGRATION_PRE_COPY (1 << 2)
+#endif
} __attribute__((packed));
+#ifndef VFIO_REGION_TYPE_MIGRATION_DEPRECATED
+#define VFIO_DEVICE_FEATURE_MIGRATION 1
+#endif
+_Static_assert(sizeof(struct vfio_user_device_feature_migration) == 8,
+ "bad vfio_user_device_feature_migration size");
-#ifndef VFIO_REGION_TYPE_MIGRATION
+/* Analogous to struct vfio_device_feature_mig_state */
+struct vfio_user_device_feature_mig_state {
+ uint32_t device_state;
+ uint32_t data_fd;
+} __attribute__((packed));
+#ifndef VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE
+#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2
+#endif
+_Static_assert(sizeof(struct vfio_user_device_feature_migration) == 8,
+ "bad vfio_user_device_feature_mig_state size");
-#define VFIO_REGION_TYPE_MIGRATION (3)
-#define VFIO_REGION_SUBTYPE_MIGRATION (1)
+/* Analogous to enum vfio_device_mig_state */
+enum vfio_user_device_mig_state {
+ VFIO_USER_DEVICE_STATE_ERROR = 0,
+ VFIO_USER_DEVICE_STATE_STOP = 1,
+ VFIO_USER_DEVICE_STATE_RUNNING = 2,
+ VFIO_USER_DEVICE_STATE_STOP_COPY = 3,
+ VFIO_USER_DEVICE_STATE_RESUMING = 4,
+ VFIO_USER_DEVICE_STATE_RUNNING_P2P = 5,
+ VFIO_USER_DEVICE_STATE_PRE_COPY = 6,
+ VFIO_USER_DEVICE_STATE_PRE_COPY_P2P = 7,
+ VFIO_USER_DEVICE_NUM_STATES = 8,
+};
-#endif /* VFIO_REGION_TYPE_MIGRATION */
+struct vfio_user_mig_data {
+ uint32_t argsz;
+ uint32_t size;
+ uint8_t data[];
+} __attribute__((packed));
#ifdef __cplusplus
}