aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2021-10-05 13:54:26 +0100
committerGitHub <noreply@github.com>2021-10-05 13:54:26 +0100
commit8d82bd5f20fac5d8b4dab510d2294e076a6dd93d (patch)
treecb287e9436e387d14ca0d81a2182dd10fca8bd86
parente7f0fc73cdab811948adf5227c04f722a7c3105b (diff)
downloadlibvfio-user-8d82bd5f20fac5d8b4dab510d2294e076a6dd93d.zip
libvfio-user-8d82bd5f20fac5d8b4dab510d2294e076a6dd93d.tar.gz
libvfio-user-8d82bd5f20fac5d8b4dab510d2294e076a6dd93d.tar.bz2
make migration state callback optionally asynchronous (#608)
Some devices need the migration state callback to be asynchronous. The simplest way to implement this is to require from the callback to return -1 and set errno to EBUSY, not process any other new messages (vfu_ctx_run returns -1 and sets errno to EBUSY), and provide a way to the user to complete migration (vfu_migr_done). Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com> Reviewed-by: John Levon <john.levon@nutanix.com> Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com>
-rw-r--r--include/libvfio-user.h22
-rw-r--r--lib/libvfio-user.c76
-rw-r--r--lib/migration.c5
-rw-r--r--lib/private.h2
-rw-r--r--test/py/libvfio_user.py28
-rw-r--r--test/py/test_migration.py143
6 files changed, 256 insertions, 20 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h
index 8a4bfe7..de215ff 100644
--- a/include/libvfio-user.h
+++ b/include/libvfio-user.h
@@ -503,6 +503,17 @@ typedef struct {
*
* The callback should return -1 on error, setting errno.
*
+ * When operating in non-blocking mode (LIBVFIO_USER_FLAG_ATTACH_NB was
+ * passed to vfu_create_ctx) and -1 is returned with errno set to EBUSY,
+ * transitioning to the new state becomes asynchronous: libvfio-user does
+ * not send a response to the client and does not process any new messages.
+ * Transitioning to the new device state is completed by calling
+ * vfu_migr_done. This behavior can be beneficial for devices whose
+ * threading model does not allow blocking.
+ *
+ * The user must not call functions vfu_dma_read or vfu_dma_write, doing so
+ * results in undefined behavior.
+ *
* TODO rename to vfu_migration_state_transition_callback
* FIXME maybe we should create a single callback and pass the state?
*/
@@ -569,6 +580,17 @@ typedef struct {
} vfu_migration_callbacks_t;
+/*
+ * Completes a pending migration state transition. Calling this function when
+ * there is no pending migration state transition results in undefined
+ * behavior.
+ *
+ * @vfu_ctx: the libvfio-user context
+ * @reply_errno: 0 for success or errno on error.
+ */
+void
+vfu_migr_done(vfu_ctx_t *vfu_ctx, int reply_errno);
+
#ifndef VFIO_DEVICE_STATE_STOP
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 25b58e0..a56d34c 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -335,7 +335,18 @@ handle_region_access(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
ret = region_access(vfu_ctx, in_ra->region, buf, in_ra->count,
in_ra->offset, msg->hdr.cmd == VFIO_USER_REGION_WRITE);
-
+ if (ret == -1 && in_ra->region == VFU_PCI_DEV_MIGR_REGION_IDX
+ && errno == EBUSY && (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB)) {
+ /*
+ * We don't support async behavior for the non-blocking mode simply
+ * because we don't have a use case yet, the only user of migration
+ * is SPDK and it operates in non-blocking mode. We don't know the
+ * implications of enabling this in blocking mode as we haven't looked
+ * at the details.
+ */
+ vfu_ctx->migr_trans_pending = true;
+ return 0;
+ }
if (ret != in_ra->count) {
vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#lx-%#lx: %m",
msg->hdr.cmd == VFIO_USER_REGION_WRITE ? "write" : "read",
@@ -1209,6 +1220,29 @@ exec_command(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg)
return ret;
}
+static int
+do_reply(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, int reply_errno)
+{
+ assert(vfu_ctx != NULL);
+ assert(msg != NULL);
+
+ int ret = vfu_ctx->tran->reply(vfu_ctx, msg, reply_errno);
+
+ if (ret < 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "failed to reply: %m");
+
+ if (errno == ECONNRESET) {
+ vfu_reset_ctx(vfu_ctx, "reset");
+ errno = ENOTCONN;
+ } else if (errno == ENOMSG) {
+ vfu_reset_ctx(vfu_ctx, "closed");
+ errno = ENOTCONN;
+ }
+ }
+
+ return ret;
+}
+
/*
* Handle requests over the vfio-user socket. This can return immediately if we
* are non-blocking, and there is no request from the client ready to read from
@@ -1257,25 +1291,19 @@ process_request(vfu_ctx_t *vfu_ctx)
}
out:
+ if (vfu_ctx->migr_trans_pending) {
+ assert(ret == 0);
+ vfu_ctx->migr_trans_msg = msg;
+ /* NB the message is freed in vfu_migr_done */
+ return 0;
+ }
if (msg->hdr.flags.no_reply) {
/*
* A failed client request is not a failure of process_request() itself.
*/
ret = 0;
} else {
- ret = vfu_ctx->tran->reply(vfu_ctx, msg, ret == 0 ? 0 : errno);
-
- if (ret < 0) {
- vfu_log(vfu_ctx, LOG_ERR, "failed to reply: %m");
-
- if (errno == ECONNRESET) {
- vfu_reset_ctx(vfu_ctx, "reset");
- errno = ENOTCONN;
- } else if (errno == ENOMSG) {
- vfu_reset_ctx(vfu_ctx, "closed");
- errno = ENOTCONN;
- }
- }
+ ret = do_reply(vfu_ctx, msg, ret == 0 ? 0 : errno);
}
free_msg(vfu_ctx, msg);
@@ -1376,6 +1404,9 @@ vfu_run_ctx(vfu_ctx_t *vfu_ctx)
blocking = !(vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB);
do {
+ if (vfu_ctx->migr_trans_pending) {
+ return ERROR_INT(EBUSY);
+ }
err = process_request(vfu_ctx);
if (err == 0) {
@@ -1928,12 +1959,14 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd,
EXPORT int
vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
{
+ assert(!vfu_ctx->migr_trans_pending);
return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data);
}
EXPORT int
vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
{
+ assert(!vfu_ctx->migr_trans_pending);
return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data);
}
@@ -1943,4 +1976,19 @@ vfu_sg_is_mappable(vfu_ctx_t *vfu_ctx, dma_sg_t *sg)
return dma_sg_is_mappable(vfu_ctx->dma, sg);
}
+EXPORT void
+vfu_migr_done(vfu_ctx_t *vfu_ctx, int reply_errno)
+{
+ assert(vfu_ctx != NULL);
+ assert(vfu_ctx->migr_trans_pending);
+
+ if (!vfu_ctx->migr_trans_msg->hdr.flags.no_reply) {
+ do_reply(vfu_ctx, vfu_ctx->migr_trans_msg, reply_errno);
+ }
+ free_msg(vfu_ctx, vfu_ctx->migr_trans_msg);
+ vfu_ctx->migr_trans_msg = NULL;
+
+ vfu_ctx->migr_trans_pending = false;
+}
+
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/lib/migration.c b/lib/migration.c
index c1fe0b5..c8b97fa 100644
--- a/lib/migration.c
+++ b/lib/migration.c
@@ -423,6 +423,11 @@ MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf,
"migration: transition from state %s to state %s",
migr_states[old_device_state].name,
migr_states[*device_state].name);
+ } else if (errno == EBUSY) {
+ vfu_log(vfu_ctx, LOG_DEBUG,
+ "migration: transition from state %s to state %s deferred",
+ migr_states[old_device_state].name,
+ migr_states[*device_state].name);
} else {
vfu_log(vfu_ctx, LOG_ERR,
"migration: failed to transition from state %s to state %s",
diff --git a/lib/private.h b/lib/private.h
index 93a354b..05d2fa4 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -169,6 +169,8 @@ struct vfu_ctx {
size_t client_max_data_xfer_size;
struct migration *migration;
+ bool migr_trans_pending;
+ vfu_msg_t *migr_trans_msg;
uint32_t irq_count[VFU_DEV_NUM_IRQS];
vfu_irqs_t *irqs;
diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py
index 1ad5db5..5cba324 100644
--- a/test/py/libvfio_user.py
+++ b/test/py/libvfio_user.py
@@ -100,6 +100,13 @@ VFIO_IRQ_SET_ACTION_TRIGGER = (1 << 5)
VFIO_DMA_UNMAP_FLAG_ALL = (1 << 1)
+VFIO_DEVICE_STATE_STOP = (0)
+VFIO_DEVICE_STATE_RUNNING = (1 << 0)
+VFIO_DEVICE_STATE_SAVING = (1 << 1)
+VFIO_DEVICE_STATE_RESUMING = (1 << 2)
+VFIO_DEVICE_STATE_MASK = ((1 << 3) - 1)
+
+
# libvfio-user defines
VFU_TRANS_SOCK = 0
@@ -433,7 +440,7 @@ class vfio_user_bitmap_range(Structure):
("bitmap", vfio_user_bitmap)
]
-transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int)
+transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int, use_errno=True)
get_pending_bytes_cb_t = c.CFUNCTYPE(c.c_uint64, c.c_void_p)
prepare_data_cb_t = c.CFUNCTYPE(c.c_void_p, c.POINTER(c.c_uint64),
c.POINTER(c.c_uint64))
@@ -513,6 +520,8 @@ lib.vfu_create_ioeventfd.argtypes = (c.c_void_p, c.c_uint32, c.c_int,
c.c_size_t, c.c_uint32, c.c_uint32,
c.c_uint64)
+lib.vfu_migr_done.argtypes = (c.c_void_p, c.c_int)
+
def to_byte(val):
"""Cast an int to a byte value."""
@@ -563,7 +572,7 @@ def get_reply(sock, expect=0):
assert errno == expect
return buf[16:]
-def msg(ctx, sock, cmd, payload, expect=0, fds=None):
+def msg(ctx, sock, cmd, payload, expect=0, fds=None, rsp=True):
"""Round trip a request and reply to the server."""
hdr = vfio_user_header(cmd, size=len(payload))
@@ -576,6 +585,8 @@ def msg(ctx, sock, cmd, payload, expect=0, fds=None):
ret = vfu_run_ctx(ctx)
assert ret >= 0
+ if not rsp:
+ return
return get_reply(sock, expect=expect)
def get_reply_fds(sock, expect=0):
@@ -643,7 +654,7 @@ def write_pci_cfg_space(ctx, buf, count, offset, extended=False):
return count
def access_region(ctx, sock, is_write, region, offset, count,
- data=None, expect=0):
+ data=None, expect=0, rsp=True):
# struct vfio_user_region_access
payload = struct.pack("QII", offset, region, count)
if is_write:
@@ -651,15 +662,16 @@ def access_region(ctx, sock, is_write, region, offset, count,
cmd = VFIO_USER_REGION_WRITE if is_write else VFIO_USER_REGION_READ
- result = msg(ctx, sock, cmd, payload, expect=expect)
+ result = msg(ctx, sock, cmd, payload, expect=expect, rsp=rsp)
if is_write:
return None
return skip("QII", result)
-def write_region(ctx, sock, region, offset, count, data, expect=0):
- access_region(ctx, sock, True, region, offset, count, data, expect=expect)
+def write_region(ctx, sock, region, offset, count, data, expect=0, rsp=True):
+ access_region(ctx, sock, True, region, offset, count, data, expect=expect,
+ rsp=rsp)
def read_region(ctx, sock, region, offset, count, expect=0):
return access_region(ctx, sock, False, region, offset, count, expect=expect)
@@ -858,4 +870,8 @@ def vfu_create_ioeventfd(ctx, region_idx, fd, offset, size, flags, datamatch):
assert ctx != None
return lib.vfu_create_ioeventfd(ctx, region_idx, fd, offset, size, flags, datamatch)
+
+def vfu_migr_done(ctx, err):
+ return lib.vfu_migr_done(ctx, err)
+
# ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: #
diff --git a/test/py/test_migration.py b/test/py/test_migration.py
new file mode 100644
index 0000000..04c35cc
--- /dev/null
+++ b/test/py/test_migration.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) 2021 Nutanix Inc. All rights reserved.
+#
+# Authors: Thanos Makatos <thanos@nutanix.com>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Nutanix nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
+#
+
+from libvfio_user import *
+import ctypes as c
+import errno
+
+ctx = None
+
+global trans_cb_err
+trans_cb_err = 0
+
+@transition_cb_t
+def trans_cb(ctx, state):
+ global trans_cb_err
+ if trans_cb_err != 0:
+ c.set_errno(trans_cb_err)
+ return -1
+ return 0
+
+
+def test_migration_setup():
+ global ctx, sock
+
+ ctx = vfu_create_ctx(flags=LIBVFIO_USER_FLAG_ATTACH_NB)
+ assert ctx != None
+
+ ret = vfu_setup_region(ctx, index=VFU_PCI_DEV_MIGR_REGION_IDX, size=0x2000,
+ flags=VFU_REGION_FLAG_RW)
+ assert ret == 0
+
+ @c.CFUNCTYPE(c.c_int)
+ def stub():
+ return 0
+
+ cbs = vfu_migration_callbacks_t()
+ cbs.version = VFU_MIGR_CALLBACKS_VERS
+ cbs.transition = trans_cb
+ cbs.get_pending_bytes = c.cast(stub, get_pending_bytes_cb_t)
+ cbs.prepare_data = c.cast(stub, prepare_data_cb_t)
+ cbs.read_data = c.cast(stub, read_data_cb_t)
+ cbs.write_data = c.cast(stub, write_data_cb_t)
+ cbs.data_written = c.cast(stub, data_written_cb_t)
+
+ ret = vfu_setup_device_migration_callbacks(ctx, cbs, offset=0x4000)
+ assert ret == 0
+
+ ret = vfu_realize_ctx(ctx)
+ assert ret == 0
+
+ sock = connect_client(ctx)
+
+
+def test_migration_trans_sync():
+
+ data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little')
+ write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0,
+ count=len(data), data=data)
+
+ ret = vfu_run_ctx(ctx)
+ assert ret == 0
+
+
+def test_migration_trans_sync_err():
+
+ global trans_cb_err
+ trans_cb_err = errno.EPERM
+
+ data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little')
+ write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0,
+ count=len(data), data=data, expect=errno.EPERM)
+
+ ret = vfu_run_ctx(ctx)
+ assert ret == 0
+
+
+def test_migration_trans_async():
+
+ global trans_cb_err
+ trans_cb_err = errno.EBUSY
+
+ data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little')
+ write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0,
+ count=len(data), data=data, rsp=False)
+
+ ret = vfu_run_ctx(ctx)
+ assert ret == -1
+ assert c.get_errno() == errno.EBUSY
+
+ vfu_migr_done(ctx, 0)
+
+ get_reply(sock)
+
+ ret = vfu_run_ctx(ctx)
+ assert ret == 0
+
+
+def test_migration_trans_async_err():
+
+ global trans_cb_err
+ trans_cb_err = errno.EBUSY
+
+ data = VFIO_DEVICE_STATE_RUNNING.to_bytes(c.sizeof(c.c_int), 'little')
+ write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0,
+ count=len(data), data=data, rsp=False)
+
+ ret = vfu_run_ctx(ctx)
+ assert ret == -1
+ assert c.get_errno() == errno.EBUSY
+
+ vfu_migr_done(ctx, errno.ENOTTY)
+
+ get_reply(sock, errno.ENOTTY)
+
+ vfu_destroy_ctx(ctx)
+
+# ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: #