diff options
-rw-r--r-- | include/libvfio-user.h | 22 | ||||
-rw-r--r-- | lib/libvfio-user.c | 76 | ||||
-rw-r--r-- | lib/migration.c | 5 | ||||
-rw-r--r-- | lib/private.h | 2 | ||||
-rw-r--r-- | test/py/libvfio_user.py | 28 | ||||
-rw-r--r-- | test/py/test_migration.py | 143 |
6 files changed, 256 insertions, 20 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 8a4bfe7..de215ff 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -503,6 +503,17 @@ typedef struct { * * The callback should return -1 on error, setting errno. * + * When operating in non-blocking mode (LIBVFIO_USER_FLAG_ATTACH_NB was + * passed to vfu_create_ctx) and -1 is returned with errno set to EBUSY, + * transitioning to the new state becomes asynchronous: libvfio-user does + * not send a response to the client and does not process any new messages. + * Transitioning to the new device state is completed by calling + * vfu_migr_done. This behavior can be beneficial for devices whose + * threading model does not allow blocking. + * + * The user must not call functions vfu_dma_read or vfu_dma_write, doing so + * results in undefined behavior. + * * TODO rename to vfu_migration_state_transition_callback * FIXME maybe we should create a single callback and pass the state? */ @@ -569,6 +580,17 @@ typedef struct { } vfu_migration_callbacks_t; +/* + * Completes a pending migration state transition. Calling this function when + * there is no pending migration state transition results in undefined + * behavior. + * + * @vfu_ctx: the libvfio-user context + * @reply_errno: 0 for success or errno on error. + */ +void +vfu_migr_done(vfu_ctx_t *vfu_ctx, int reply_errno); + #ifndef VFIO_DEVICE_STATE_STOP diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 25b58e0..a56d34c 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -335,7 +335,18 @@ handle_region_access(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ret = region_access(vfu_ctx, in_ra->region, buf, in_ra->count, in_ra->offset, msg->hdr.cmd == VFIO_USER_REGION_WRITE); - + if (ret == -1 && in_ra->region == VFU_PCI_DEV_MIGR_REGION_IDX + && errno == EBUSY && (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB)) { + /* + * We don't support async behavior for the non-blocking mode simply + * because we don't have a use case yet, the only user of migration + * is SPDK and it operates in non-blocking mode. We don't know the + * implications of enabling this in blocking mode as we haven't looked + * at the details. + */ + vfu_ctx->migr_trans_pending = true; + return 0; + } if (ret != in_ra->count) { vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#lx-%#lx: %m", msg->hdr.cmd == VFIO_USER_REGION_WRITE ? "write" : "read", @@ -1209,6 +1220,29 @@ exec_command(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) return ret; } +static int +do_reply(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, int reply_errno) +{ + assert(vfu_ctx != NULL); + assert(msg != NULL); + + int ret = vfu_ctx->tran->reply(vfu_ctx, msg, reply_errno); + + if (ret < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to reply: %m"); + + if (errno == ECONNRESET) { + vfu_reset_ctx(vfu_ctx, "reset"); + errno = ENOTCONN; + } else if (errno == ENOMSG) { + vfu_reset_ctx(vfu_ctx, "closed"); + errno = ENOTCONN; + } + } + + return ret; +} + /* * Handle requests over the vfio-user socket. This can return immediately if we * are non-blocking, and there is no request from the client ready to read from @@ -1257,25 +1291,19 @@ process_request(vfu_ctx_t *vfu_ctx) } out: + if (vfu_ctx->migr_trans_pending) { + assert(ret == 0); + vfu_ctx->migr_trans_msg = msg; + /* NB the message is freed in vfu_migr_done */ + return 0; + } if (msg->hdr.flags.no_reply) { /* * A failed client request is not a failure of process_request() itself. */ ret = 0; } else { - ret = vfu_ctx->tran->reply(vfu_ctx, msg, ret == 0 ? 0 : errno); - - if (ret < 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to reply: %m"); - - if (errno == ECONNRESET) { - vfu_reset_ctx(vfu_ctx, "reset"); - errno = ENOTCONN; - } else if (errno == ENOMSG) { - vfu_reset_ctx(vfu_ctx, "closed"); - errno = ENOTCONN; - } - } + ret = do_reply(vfu_ctx, msg, ret == 0 ? 0 : errno); } free_msg(vfu_ctx, msg); @@ -1376,6 +1404,9 @@ vfu_run_ctx(vfu_ctx_t *vfu_ctx) blocking = !(vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB); do { + if (vfu_ctx->migr_trans_pending) { + return ERROR_INT(EBUSY); + } err = process_request(vfu_ctx); if (err == 0) { @@ -1928,12 +1959,14 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, EXPORT int vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) { + assert(!vfu_ctx->migr_trans_pending); return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sg, data); } EXPORT int vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) { + assert(!vfu_ctx->migr_trans_pending); return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sg, data); } @@ -1943,4 +1976,19 @@ vfu_sg_is_mappable(vfu_ctx_t *vfu_ctx, dma_sg_t *sg) return dma_sg_is_mappable(vfu_ctx->dma, sg); } +EXPORT void +vfu_migr_done(vfu_ctx_t *vfu_ctx, int reply_errno) +{ + assert(vfu_ctx != NULL); + assert(vfu_ctx->migr_trans_pending); + + if (!vfu_ctx->migr_trans_msg->hdr.flags.no_reply) { + do_reply(vfu_ctx, vfu_ctx->migr_trans_msg, reply_errno); + } + free_msg(vfu_ctx, vfu_ctx->migr_trans_msg); + vfu_ctx->migr_trans_msg = NULL; + + vfu_ctx->migr_trans_pending = false; +} + /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/migration.c b/lib/migration.c index c1fe0b5..c8b97fa 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -423,6 +423,11 @@ MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf, "migration: transition from state %s to state %s", migr_states[old_device_state].name, migr_states[*device_state].name); + } else if (errno == EBUSY) { + vfu_log(vfu_ctx, LOG_DEBUG, + "migration: transition from state %s to state %s deferred", + migr_states[old_device_state].name, + migr_states[*device_state].name); } else { vfu_log(vfu_ctx, LOG_ERR, "migration: failed to transition from state %s to state %s", diff --git a/lib/private.h b/lib/private.h index 93a354b..05d2fa4 100644 --- a/lib/private.h +++ b/lib/private.h @@ -169,6 +169,8 @@ struct vfu_ctx { size_t client_max_data_xfer_size; struct migration *migration; + bool migr_trans_pending; + vfu_msg_t *migr_trans_msg; uint32_t irq_count[VFU_DEV_NUM_IRQS]; vfu_irqs_t *irqs; diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py index 1ad5db5..5cba324 100644 --- a/test/py/libvfio_user.py +++ b/test/py/libvfio_user.py @@ -100,6 +100,13 @@ VFIO_IRQ_SET_ACTION_TRIGGER = (1 << 5) VFIO_DMA_UNMAP_FLAG_ALL = (1 << 1) +VFIO_DEVICE_STATE_STOP = (0) +VFIO_DEVICE_STATE_RUNNING = (1 << 0) +VFIO_DEVICE_STATE_SAVING = (1 << 1) +VFIO_DEVICE_STATE_RESUMING = (1 << 2) +VFIO_DEVICE_STATE_MASK = ((1 << 3) - 1) + + # libvfio-user defines VFU_TRANS_SOCK = 0 @@ -433,7 +440,7 @@ class vfio_user_bitmap_range(Structure): ("bitmap", vfio_user_bitmap) ] -transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int) +transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int, use_errno=True) get_pending_bytes_cb_t = c.CFUNCTYPE(c.c_uint64, c.c_void_p) prepare_data_cb_t = c.CFUNCTYPE(c.c_void_p, c.POINTER(c.c_uint64), c.POINTER(c.c_uint64)) @@ -513,6 +520,8 @@ lib.vfu_create_ioeventfd.argtypes = (c.c_void_p, c.c_uint32, c.c_int, c.c_size_t, c.c_uint32, c.c_uint32, c.c_uint64) +lib.vfu_migr_done.argtypes = (c.c_void_p, c.c_int) + def to_byte(val): """Cast an int to a byte value.""" @@ -563,7 +572,7 @@ def get_reply(sock, expect=0): assert errno == expect return buf[16:] -def msg(ctx, sock, cmd, payload, expect=0, fds=None): +def msg(ctx, sock, cmd, payload, expect=0, fds=None, rsp=True): """Round trip a request and reply to the server.""" hdr = vfio_user_header(cmd, size=len(payload)) @@ -576,6 +585,8 @@ def msg(ctx, sock, cmd, payload, expect=0, fds=None): ret = vfu_run_ctx(ctx) assert ret >= 0 + if not rsp: + return return get_reply(sock, expect=expect) def get_reply_fds(sock, expect=0): @@ -643,7 +654,7 @@ def write_pci_cfg_space(ctx, buf, count, offset, extended=False): return count def access_region(ctx, sock, is_write, region, offset, count, - data=None, expect=0): + data=None, expect=0, rsp=True): # struct vfio_user_region_access payload = struct.pack("QII", offset, region, count) if is_write: @@ -651,15 +662,16 @@ def access_region(ctx, sock, is_write, region, offset, count, cmd = VFIO_USER_REGION_WRITE if is_write else VFIO_USER_REGION_READ - result = msg(ctx, sock, cmd, payload, expect=expect) + result = msg(ctx, sock, cmd, payload, expect=expect, rsp=rsp) if is_write: return None return skip("QII", result) -def write_region(ctx, sock, region, offset, count, data, expect=0): - access_region(ctx, sock, True, region, offset, count, data, expect=expect) +def write_region(ctx, sock, region, offset, count, data, expect=0, rsp=True): + access_region(ctx, sock, True, region, offset, count, data, expect=expect, + rsp=rsp) def read_region(ctx, sock, region, offset, count, expect=0): return access_region(ctx, sock, False, region, offset, count, expect=expect) @@ -858,4 +870,8 @@ def vfu_create_ioeventfd(ctx, region_idx, fd, offset, size, flags, datamatch): assert ctx != None return lib.vfu_create_ioeventfd(ctx, region_idx, fd, offset, size, flags, datamatch) + +def vfu_migr_done(ctx, err): + return lib.vfu_migr_done(ctx, err) + # ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: # diff --git a/test/py/test_migration.py b/test/py/test_migration.py new file mode 100644 index 0000000..04c35cc --- /dev/null +++ b/test/py/test_migration.py @@ -0,0 +1,143 @@ +# +# Copyright (c) 2021 Nutanix Inc. All rights reserved. +# +# Authors: Thanos Makatos <thanos@nutanix.com> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Nutanix nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +# DAMAGE. +# + +from libvfio_user import * +import ctypes as c +import errno + +ctx = None + +global trans_cb_err +trans_cb_err = 0 + +@transition_cb_t +def trans_cb(ctx, state): + global trans_cb_err + if trans_cb_err != 0: + c.set_errno(trans_cb_err) + return -1 + return 0 + + +def test_migration_setup(): + global ctx, sock + + ctx = vfu_create_ctx(flags=LIBVFIO_USER_FLAG_ATTACH_NB) + assert ctx != None + + ret = vfu_setup_region(ctx, index=VFU_PCI_DEV_MIGR_REGION_IDX, size=0x2000, + flags=VFU_REGION_FLAG_RW) + assert ret == 0 + + @c.CFUNCTYPE(c.c_int) + def stub(): + return 0 + + cbs = vfu_migration_callbacks_t() + cbs.version = VFU_MIGR_CALLBACKS_VERS + cbs.transition = trans_cb + cbs.get_pending_bytes = c.cast(stub, get_pending_bytes_cb_t) + cbs.prepare_data = c.cast(stub, prepare_data_cb_t) + cbs.read_data = c.cast(stub, read_data_cb_t) + cbs.write_data = c.cast(stub, write_data_cb_t) + cbs.data_written = c.cast(stub, data_written_cb_t) + + ret = vfu_setup_device_migration_callbacks(ctx, cbs, offset=0x4000) + assert ret == 0 + + ret = vfu_realize_ctx(ctx) + assert ret == 0 + + sock = connect_client(ctx) + + +def test_migration_trans_sync(): + + data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little') + write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0, + count=len(data), data=data) + + ret = vfu_run_ctx(ctx) + assert ret == 0 + + +def test_migration_trans_sync_err(): + + global trans_cb_err + trans_cb_err = errno.EPERM + + data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little') + write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0, + count=len(data), data=data, expect=errno.EPERM) + + ret = vfu_run_ctx(ctx) + assert ret == 0 + + +def test_migration_trans_async(): + + global trans_cb_err + trans_cb_err = errno.EBUSY + + data = VFIO_DEVICE_STATE_SAVING.to_bytes(c.sizeof(c.c_int), 'little') + write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0, + count=len(data), data=data, rsp=False) + + ret = vfu_run_ctx(ctx) + assert ret == -1 + assert c.get_errno() == errno.EBUSY + + vfu_migr_done(ctx, 0) + + get_reply(sock) + + ret = vfu_run_ctx(ctx) + assert ret == 0 + + +def test_migration_trans_async_err(): + + global trans_cb_err + trans_cb_err = errno.EBUSY + + data = VFIO_DEVICE_STATE_RUNNING.to_bytes(c.sizeof(c.c_int), 'little') + write_region(ctx, sock, VFU_PCI_DEV_MIGR_REGION_IDX, offset=0, + count=len(data), data=data, rsp=False) + + ret = vfu_run_ctx(ctx) + assert ret == -1 + assert c.get_errno() == errno.EBUSY + + vfu_migr_done(ctx, errno.ENOTTY) + + get_reply(sock, errno.ENOTTY) + + vfu_destroy_ctx(ctx) + +# ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: # |