/* * Copyright (c) 2019 Nutanix Inc. All rights reserved. * * Authors: Thanos Makatos * Swapnil Ingle * Felipe Franciosi * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Nutanix nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ /* * Defines the libvfio-user server-side API. The protocol definitions can be * found in vfio-user.h. * * This is not currently a stable API or ABI, and may change at any time. * Library calls are not guaranteed thread-safe: multi-threaded consumers need * to protect calls with their own exclusion methods. */ #ifndef LIB_VFIO_USER_H #define LIB_VFIO_USER_H #include #include #include #include #include #include "pci_caps/dsn.h" #include "pci_caps/msi.h" #include "pci_caps/msix.h" #include "pci_caps/pm.h" #include "pci_caps/px.h" #include "pci_defs.h" #include "vfio-user.h" #ifdef __cplusplus extern "C" { #endif #define LIB_VFIO_USER_MAJOR 0 #define LIB_VFIO_USER_MINOR 2 /* DMA addresses cannot be directly de-referenced. */ typedef void *vfu_dma_addr_t; struct dma_sg; typedef struct dma_sg dma_sg_t; typedef struct vfu_ctx vfu_ctx_t; /* * Returns the size, in bytes, of dma_sg_t. */ size_t dma_sg_size(void); /* * Attaching to the transport is non-blocking. * The caller must then manually call vfu_attach_ctx(), * which is non-blocking, as many times as necessary. * * This also applies to vfu_run_ctx(). However, it's presumed that any actual * reads or writes of the socket connection will not need to block, since both * APIS are synchronous. */ #define LIBVFIO_USER_FLAG_ATTACH_NB (1 << 0) typedef enum { VFU_TRANS_SOCK, // For internal testing only VFU_TRANS_PIPE, VFU_TRANS_MAX } vfu_trans_t; typedef enum { VFU_DEV_TYPE_PCI } vfu_dev_type_t; /** * Creates libvfio-user context. By default one ERR and one REQ IRQs are * initialized, this can be overridden with vfu_setup_device_nr_irqs. * * @trans: transport type * @path: path to socket file. * @flags: context flags (LIBVFIO_USER_FLAG_*) * @pvt: private data * @dev_type: device type * * @returns the vfu_ctx to be used or NULL on error. Sets errno. */ vfu_ctx_t * vfu_create_ctx(vfu_trans_t trans, const char *path, int flags, void *pvt, vfu_dev_type_t dev_type); /* * Finalizes the device making it ready for vfu_attach_ctx(). This function is * mandatory to be called before vfu_attach_ctx(). * @vfu_ctx: the libvfio-user context * * @returns: 0 on success, -1 on error. Sets errno. */ int vfu_realize_ctx(vfu_ctx_t *vfu_ctx); /* * Attempts to attach to the transport. Attach is mandatory before vfu_run_ctx() * and is non blocking if context is created with LIBVFIO_USER_FLAG_ATTACH_NB * flag. * * @returns: 0 on success, -1 on error. Sets errno. If errno is set to EAGAIN * or EWOULDBLOCK then the transport is not ready to attach to and the operation * must be retried. * * @vfu_ctx: the libvfio-user context */ int vfu_attach_ctx(vfu_ctx_t *vfu_ctx); /** * Return a file descriptor suitable for waiting on via epoll() or similar. The * file descriptor may change after a successful vfu_attach_ctx(), or on * receiving ENOTCONN error message from vfu_run_ctx(); in those cases, * vfu_get_poll_fd() should be called again to get the current correct file * descriptor. */ int vfu_get_poll_fd(vfu_ctx_t *vfu_ctx); /** * Polls the vfu_ctx and processes the command received from client. * - Blocking vfu_ctx: * Blocks until new request is received from client and continues processing * the requests. Exits only in case of error or if the client disconnects. * - Non-blocking vfu_ctx(LIBVFIO_USER_FLAG_ATTACH_NB): * Processes one request from client if it's available, otherwise it * immediately returns and the caller is responsible for periodically * calling again. * * @vfu_ctx: The libvfio-user context to poll * * @returns the number of requests processed (0 or more); or -1 on error, * with errno set as follows: * * ENOTCONN: client closed connection, vfu_attach_ctx() should be called again * EBUSY: the device was asked to quiesce and is still quiescing * Other errno values are also possible. */ int vfu_run_ctx(vfu_ctx_t *vfu_ctx); /** * Destroys libvfio-user context. During this call the device must already be * in quiesced state; the quiesce callback is not called. Any other device * callback can be called. * * @vfu_ctx: the libvfio-user context to destroy */ void vfu_destroy_ctx(vfu_ctx_t *vfu_ctx); /** * Return the private pointer given to vfu_create_ctx(). */ void * vfu_get_private(vfu_ctx_t *vfu_ctx); /** * Callback function signature for log function * @vfu_ctx: the libvfio-user context * @level: log level as defined in syslog(3) * @vfu_log_fn_t: typedef for log function. * @msg: message */ typedef void (vfu_log_fn_t)(vfu_ctx_t *vfu_ctx, int level, const char *msg); /** * Log to the logging function configured for this context. The format should * not include a new line. */ void vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...) \ __attribute__((format(printf, 3, 4))); /** * Set up logging information. * @vfu_ctx: the libvfio-user context * @log: logging function * @level: logging level as defined in syslog(3) * * The log handler is expected to add a newline (that is, log messages do not * include a newline). */ int vfu_setup_log(vfu_ctx_t *vfu_ctx, vfu_log_fn_t *log, int level); /** * Prototype for region access callback. When a region is accessed, libvfio-user * calls the previously registered callback with the following arguments: * * @vfu_ctx: the libvfio-user context * @buf: buffer containing the data to be written or data to be read into * @count: number of bytes being read or written * @offset: byte offset within the region * @is_write: whether or not this is a write * * @returns the number of bytes read or written, or -1 on error, setting errno. */ typedef ssize_t (vfu_region_access_cb_t)(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, bool is_write); #define VFU_REGION_FLAG_READ (1 << 0) #define VFU_REGION_FLAG_WRITE (1 << 1) #define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE) /* If unset, this is an IO region. */ #define VFU_REGION_FLAG_MEM (1 << 2) #define VFU_REGION_FLAG_ALWAYS_CB (1 << 3) #define VFU_REGION_FLAG_64_BITS (1 << 4) #define VFU_REGION_FLAG_PREFETCH (1 << 5) #define VFU_REGION_FLAG_MASK (VFU_REGION_FLAG_RW | VFU_REGION_FLAG_MEM | \ VFU_REGION_FLAG_ALWAYS_CB | VFU_REGION_FLAG_64_BITS | \ VFU_REGION_FLAG_PREFETCH) /** * Set up a device region. * * A region is an area of device memory that can be accessed by the client, * either via VFIO_USER_REGION_READ/WRITE, or directly by mapping the region * into the client's address space if an fd is given. * * A mappable region can be split into mappable sub-areas according to the * @mmap_areas array. Note that the client can memory map any part of the * file descriptor, even if not supposed to do so according to @mmap_areas. * There is no way in Linux to avoid this. * * TODO maybe we should introduce per-sparse region file descriptors so that * the client cannot possibly memory map areas it's not supposed to. Even if * the client needs to have region under the same backing file, it is possible * to create linear device-mapper targets, one for each area, and provide file * descriptors of these DM targets. This is something we can document and * demonstrate in a sample. * * Areas that are accessed via such a mapping by definition do not invoke any * given callback. However, the callback can still be invoked, even on a * mappable area, if the client chooses to call VFIO_USER_REGION_READ/WRITE. * * The following regions are special and are explained below: * - VFU_PCI_DEV_CFG_REGION_IDX, * - VFU_PCI_DEV_MIGR_REGION_IDX, and * - VFU_GENERIC_DEV_MIGR_REG_IDX. * * Region VFU_PCI_DEV_CFG_REGION_IDX, corresponding to PCI config space, has * special handling: * * - the @size argument is ignored: the region size is always the size defined * by the relevant PCI specification * - all accesses to the standard PCI header (i.e. the first 64 bytes of the * region) are handled by the library * - all accesses to known PCI capabilities (see vfu_pci_add_capability()) * are handled by the library * - if no callback is provided, reads to other areas are a simple memcpy(), * and writes are an error * - otherwise, the callback is expected to handle the access * - if VFU_REGION_FLAG_ALWAYS_CB flag is set, all accesses to the config * space are forwarded to the callback * * Regions VFU_PCI_DEV_MIGR_REGION_IDX and VFU_GENERIC_DEV_MIGR_REG_IDX, * corresponding to the migration region, enable live migration support for * the device. The migration region must contain at the beginning the migration * registers (struct vfio_user_migration_info) and the remaining part of the * region can be arbitrarily used by the device implementation. The region * provided must have at least vfu_get_migr_register_area_size() bytes available * at the start of the region (this size is guaranteed to be page-aligned). If * mmap_areas is given, it must _not_ include this part of the region. * * libvfio-user offers two ways for the migration region to be used: * 1. natively: the device implementation must handle accesses to the * migration registers and migration data via the region callbacks. The * semantics of these registers are explained in . * 2. via the vfu_migration_callbacks_t callbacks: the device implementation * registers a set of callbacks by calling vfu_setup_device_migration. * The region's read/write callbacks are never called. * * @vfu_ctx: the libvfio-user context * @region_idx: region index * @size: size of the region * @region_access: callback function to access region * @flags: region flags (VFU_REGION_FLAG_*) * @mmap_areas: array of memory mappable areas; if an fd is provided, but this * is NULL, then the entire region is mappable. * @nr_mmap_areas: number of sparse areas in @mmap_areas; must be provided if * the @mmap_areas is non-NULL, or 0 otherwise. * @fd: file descriptor of the file backing the region if the region is * mappable; it is the server's responsibility to create a file suitable for * memory mapping by the client. * @offset: offset of the region within the fd, or zero. * * @returns 0 on success, -1 on error, Sets errno. */ int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_region_access_cb_t *region_access, int flags, struct iovec *mmap_areas, uint32_t nr_mmap_areas, int fd, uint64_t offset); typedef enum vfu_reset_type { /* * Client requested a device reset (for example, as part of a guest VM * reboot). The vfio-user context remains valid, but it's expected that all * ongoing operations are completed or cancelled, and any device state is * reset to a known-good initial state (including any PCI register state). */ VFU_RESET_DEVICE, /* * The vfio-user socket client connection was closed or reset. The attached * context is cleaned up after returning from the reset callback, and * vfu_attach_ctx() must be called to establish a new client. */ VFU_RESET_LOST_CONN, /* * Client requested to initiate PCI function level reset. */ VFU_RESET_PCI_FLR } vfu_reset_type_t; /* * Device callback for quiescing the device. * * vfu_run_ctx uses this callback to request from the device to quiesce its * operation. A quiesced device must not call vfu_addr_to_sgl() or vfu_sgl_*(), * unless it does so from a device callback. * * The callback can return two values: * 1) 0: this indicates that the device was quiesced. vfu_run_ctx then continues * to execute and when vfu_run_ctx returns to the caller the device is * unquiesced. * 2) -1 with errno set to EBUSY: this indicates that the device cannot * immediately quiesce. In this case, vfu_run_ctx returns -1 with errno * set to EBUSY and future calls to vfu_run_ctx return the same. Until the * device quiesces it can continue operate as normal. The device indicates * that it quiesced by calling vfu_device_quiesced. When * vfu_device_quiesced returns the device is no longer quiesced. * * A quiesced device should expect for any of the following callbacks to be * executed: vfu_dma_register_cb_t, vfu_unregister_cb_t, vfu_reset_cb_t, and * the migration transition callback. These callbacks are only called after the * device has been quiesced. * * The following example demonstrates how a device can use the SG routines and * friends while quiesced: * * A DMA region is mapped, libvfio-user calls the quiesce callback but the * device cannot immediately quiesce: * * int quiesce_cb(vfu_ctx_t *vfu_ctx) { * errno = EBUSY; * return -1; * } * * While quiescing, the device can continue to operate as normal, including * calling functions such as vfu_sgl_get(). Then, the device finishes quiescing: * * vfu_quiesce_done(vfu_ctx, 0); * * At this point, the device must have stopped using functions like * vfu_sgl_get(), for example by pausing any I/O threads. libvfio-user * eventually calls the dma_register device callback before vfu_quiesce_done * returns. In this callback the device is allowed to call functions such as * vfu_sgl_get() * * void (dma_register_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) { * vfu_sgl_get(ctx, ...); * } * * Once vfu_quiesce_done returns, the device is unquiesced. * * * @vfu_ctx: the libvfio-user context * * @returns: 0 on success, -1 on failure with errno set. */ typedef int (vfu_device_quiesce_cb_t)(vfu_ctx_t *vfu_ctx); /** * Sets up the device quiesce callback. * * @vfu_ctx: the libvfio-user context * @quiesce_cb: device quiesce callback */ void vfu_setup_device_quiesce_cb(vfu_ctx_t *vfu_ctx, vfu_device_quiesce_cb_t *quiesce_cb); /* * Called by the device to complete a pending quiesce operation. After the * function returns the device is unquiesced. * * @vfu_ctx: the libvfio-user context * @quiesce_errno: 0 for success or errno in case the device fails to quiesce, * in which case the operation requiring the quiesce is failed * and the device is reset. * * @returns 0 on success, or -1 on failure. Sets errno. */ int vfu_device_quiesced(vfu_ctx_t *vfu_ctx, int quiesce_errno); /* * Callback function that is called when the device must be reset. */ typedef int (vfu_reset_cb_t)(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type); /** * Set up device reset callback. * * A reset should ensure that all on-going use of device IRQs or guest memory is * completed or cancelled before returning from the callback. * * @vfu_ctx: the libvfio-user context * @reset: device reset callback */ int vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset); /* * Info for a guest DMA region. @iova is always valid; the other parameters * will only be set if the guest DMA region is mappable. * * @iova: guest DMA range. This is the guest physical range (as we don't * support vIOMMU) that the guest registers for DMA, via a VFIO_USER_DMA_MAP * message, and is the address space used as input to vfu_addr_to_sgl(). * @vaddr: if the range is mapped into this process, this is the virtual address * of the start of the region. * @mapping: if @vaddr is non-NULL, this range represents the actual range * mmap()ed into the process. This might be (large) page aligned, and * therefore be different from @vaddr + @iova.iov_len. * @page_size: if @vaddr is non-NULL, page size of the mapping (e.g. 2MB) * @prot: if @vaddr is non-NULL, protection settings of the mapping as per * mmap(2) * * For a real example, using the gpio sample server, and a qemu configured to * use huge pages and share its memory: * * gpio: mapped DMA region iova=[0xf0000-0x10000000) vaddr=0x2aaaab0f0000 * page_size=0x200000 mapping=[0x2aaaab000000-0x2aaabb000000) * * 0xf0000 0x10000000 * | | * v v * +-----------------------------------+ * | Guest IOVA (DMA) space | * +--+-----------------------------------+--+ * | | | | * | +-----------------------------------+ | * | ^ libvfio-user server address space | * +--|--------------------------------------+ * ^ vaddr=0x2aaaab0f0000 ^ * | | * 0x2aaaab000000 0x2aaabb000000 * * This region can be directly accessed at 0x2aaaab0f0000, but the underlying * large page mapping is in the range [0x2aaaab000000-0x2aaabb000000). */ typedef struct vfu_dma_info { struct iovec iova; void *vaddr; struct iovec mapping; size_t page_size; uint32_t prot; } vfu_dma_info_t; /* * Called when a guest registers one of its DMA regions via a VFIO_USER_DMA_MAP * message. * * @vfu_ctx: the libvfio-user context * @info: the DMA info */ typedef void (vfu_dma_register_cb_t)(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); /* * Function that is called when the guest unregisters a DMA region. This * callback is required if you want to be able to access guest memory directly * via a mapping. The device must release all references to that region before * the callback returns. * * @vfu_ctx: the libvfio-user context * @info: the DMA info */ typedef void (vfu_dma_unregister_cb_t)(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); /** * Set up device DMA registration callbacks. When libvfio-user is notified of a * DMA range addition or removal, these callbacks will be invoked. * * If this function is not called, guest DMA regions are not accessible via * vfu_addr_to_sgl(). * * To directly access this DMA memory via a local mapping with vfu_sgl_get(), at * least @dma_unregister must be provided. * * @vfu_ctx: the libvfio-user context * @dma_register: DMA region registration callback (optional) * @dma_unregister: DMA region unregistration callback (optional) */ int vfu_setup_device_dma(vfu_ctx_t *vfu_ctx, vfu_dma_register_cb_t *dma_register, vfu_dma_unregister_cb_t *dma_unregister); enum vfu_dev_irq_type { VFU_DEV_INTX_IRQ, VFU_DEV_MSI_IRQ, VFU_DEV_MSIX_IRQ, VFU_DEV_ERR_IRQ, VFU_DEV_REQ_IRQ, VFU_DEV_NUM_IRQS }; /** * Set up device IRQ counts. * @vfu_ctx: the libvfio-user context * @type: IRQ type (VFU_DEV_INTX_IRQ ... VFU_DEV_REQ_IRQ) * @count: number of irqs */ int vfu_setup_device_nr_irqs(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type, uint32_t count); /* * Function that is called when the guest masks or unmasks an IRQ vector. * * @vfu_ctx: the libvfio-user context * @start: starting IRQ vector * @count: number of vectors * @mask: indicates if the IRQ is masked or unmasked */ typedef void (vfu_dev_irq_state_cb_t)(vfu_ctx_t *vfu_ctx, uint32_t start, uint32_t count, bool mask); /** * Set up IRQ state change callback. When libvfio-user is notified of a * change to IRQ state, whether masked or unmasked, it invokes * this callback. * * @vfu_ctx: the libvfio-user context * @type: IRQ type such as VFU_DEV_MSIX_IRQ - defined by vfu_dev_irq_type * @cb: IRQ state change callback * * @returns 0 on success, -1 on error, sets errno. */ int vfu_setup_irq_state_callback(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type, vfu_dev_irq_state_cb_t *cb); typedef enum { VFU_MIGR_STATE_STOP, VFU_MIGR_STATE_RUNNING, VFU_MIGR_STATE_STOP_AND_COPY, VFU_MIGR_STATE_PRE_COPY, VFU_MIGR_STATE_RESUME } vfu_migr_state_t; #define VFU_MIGR_CALLBACKS_VERS 2 typedef struct { /* * Set it to VFU_MIGR_CALLBACKS_VERS. */ int version; /* * Migration state transition callback. * * The callback should return -1 on error, setting errno. * * * TODO rename to vfu_migration_state_transition_callback * FIXME maybe we should create a single callback and pass the state? */ int (*transition)(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state); /* * Function that is called to read `count` bytes of migration data into * `buf`. The function must return the amount of data read or -1 on error, * setting errno. The function may return less data than requested. * * If the function returns zero, this is interpreted to mean that there is * no more migration data to read. */ ssize_t (*read_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count); /* * Function that is called for writing previously stored device state. The * function must return the amount of data written or -1 on error, setting * errno. Partial writes are not supported, so any return value other than * `count` is invalid. */ ssize_t (*write_data)(vfu_ctx_t *vfu_ctx, void *buf, uint64_t count); } vfu_migration_callbacks_t; int vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, const vfu_migration_callbacks_t *callbacks); /** * Triggers an interrupt. * * libvfio-user takes care of using the correct IRQ type (IRQ index: INTx or * MSI/X), the caller only needs to specify the sub-index. * * @vfu_ctx: the libvfio-user context to trigger interrupt * @subindex: vector subindex to trigger interrupt on * * @returns 0 on success, or -1 on failure. Sets errno. */ int vfu_irq_trigger(vfu_ctx_t *vfu_ctx, uint32_t subindex); /** * Takes a guest physical address range and populates an array of scatter/gather * entries than can be individually mapped in the program's virtual memory. A * single linear guest physical address span may need to be split into multiple * scatter/gather regions due to limitations of how memory can be mapped. * * vfu_setup_device_dma() must have been called prior to using this function. * * @vfu_ctx: the libvfio-user context * @dma_addr: the guest physical address * @len: size of memory to be mapped * @sgl: array that receives the scatter/gather entries to be mapped * @max_nr_sgs: maximum number of elements in above array * @prot: protection as defined in * * @returns the number of scatter/gather entries created on success, and on * failure: * -1: if the GPA address span is invalid (errno=ENOENT) or * protection violation (errno=EACCES) * (-x - 1): if @max_nr_sgs is too small, where x is the number of SG * entries necessary to complete this request (errno=0). */ int vfu_addr_to_sgl(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, size_t len, dma_sg_t *sgl, size_t max_nr_sgs, int prot); /** * Populate the given iovec array (accessible in the process's virtual memory), * based upon the SGL previously built via vfu_addr_to_sgl(). * It is the caller's responsibility to return the release the iovecs via * vfu_sgl_put(). * * This is only supported when a @dma_unregister callback is provided to * vfu_setup_device_dma(). * * @vfu_ctx: the libvfio-user context * @sgl: array of scatter/gather entries returned by vfu_addr_to_sg. These * entries must not be modified and the array must not be deallocated * until vfu_sgl_put() has been called. * @iov: array of iovec structures (defined in ) to receive each * mapping * @cnt: number of scatter/gather entries to map * @flags: must be 0 * * @returns 0 on success, -1 on failure. Sets errno. */ int vfu_sgl_get(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt, int flags); /** * Mark scatter/gather entries (previously acquired via vfu_sgl_get()) * as dirty (written to). This is only necessary if vfu_sgl_put() is not called. * * @vfu_ctx: the libvfio-user context * @sg: array of scatter/gather entries to mark as dirty * @cnt: number of scatter/gather entries to mark as dirty */ void vfu_sgl_mark_dirty(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t cnt); /** * Release the iovec array previously acquired by vfu_sgl_get(). * * This will automatically mark the sgl as dirty if needed. * * @vfu_ctx: the libvfio-user context * @sgl: array of scatter/gather entries to unmap * @iov: array of iovec structures for each scatter/gather entry * @cnt: number of scatter/gather entries to unmap */ void vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt); /** * Read from the dma region exposed by the client. This can be used as an * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * * @vfu_ctx: the libvfio-user context * @sg: a DMA segment obtained from dma_addr_to_sg * @data: data buffer to read into * * @returns 0 on success, -1 on failure. Sets errno. */ int vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, size_t cnt, void *data); /** * Write to the dma region exposed by the client. This can be used as an * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * * During live migration, this call does not mark any of the written pages as * dirty; the client is expected to track this. * * @vfu_ctx: the libvfio-user context * @sg: a DMA segment obtained from dma_addr_to_sg * @data: data buffer to write * * @returns 0 on success, -1 on failure. Sets errno. */ int vfu_sgl_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, size_t cnt, void *data); /* * Supported PCI regions. * * Note: in VFIO, each region starts at a terabyte offset * (VFIO_PCI_INDEX_TO_OFFSET) and because Linux supports up to 128 TB of user * space virtual memory, there can be up to 128 device regions. PCI regions are * fixed and in retrospect this choice has proven to be problematic because * devices might contain potentially unused regions. New regions can now be * positioned anywhere by using the VFIO_REGION_INFO_CAP_TYPE capability. In * vfio-user we don't have this problem because the region index is just an * identifier: the VMM memory maps a file descriptor that is passed to it and * the mapping offset is derived from the mmap_areas offset value, rather than a * static mapping from region index to offset. Thus, additional regions can * have static indexes in vfio-user. */ enum { VFU_PCI_DEV_BAR0_REGION_IDX, VFU_PCI_DEV_BAR1_REGION_IDX, VFU_PCI_DEV_BAR2_REGION_IDX, VFU_PCI_DEV_BAR3_REGION_IDX, VFU_PCI_DEV_BAR4_REGION_IDX, VFU_PCI_DEV_BAR5_REGION_IDX, VFU_PCI_DEV_ROM_REGION_IDX, VFU_PCI_DEV_CFG_REGION_IDX, VFU_PCI_DEV_VGA_REGION_IDX, VFU_PCI_DEV_NUM_REGIONS, }; typedef enum { VFU_PCI_TYPE_CONVENTIONAL, VFU_PCI_TYPE_PCI_X_1, VFU_PCI_TYPE_PCI_X_2, VFU_PCI_TYPE_EXPRESS } vfu_pci_type_t; enum { VFU_GENERIC_DEV_MIGR_REGION_IDX, VFU_GENERIC_DEV_NUM_REGIONS }; /** * Initialize the context for a PCI device. This function must be called only * once per libvfio-user context. * * This function initializes a buffer for the PCI config space, accessible via * vfu_pci_get_config_space(). * * Returns 0 on success, or -1 on error, setting errno. * * @vfu_ctx: the libvfio-user context * @pci_type: PCI type (convention PCI, PCI-X mode 1, PCI-X mode2, PCI-Express) * @hdr_type: PCI header type. Only PCI_HEADER_TYPE_NORMAL is supported. * @revision: PCI/PCI-X/PCIe revision */ int vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type, int hdr_type, int revision); /* * Set the Vendor ID, Device ID, Subsystem Vendor ID, and Subsystem ID fields of * the PCI config header (PCI3 6.2.1, 6.2.4). * * This must always be called for PCI devices, after vfu_pci_init(). */ void vfu_pci_set_id(vfu_ctx_t *vfu_ctx, uint16_t vid, uint16_t did, uint16_t ssvid, uint16_t ssid); /* * Set the class code fields (base, sub-class, and programming interface) of the * PCI config header (PCI3 6.2.1). * * If this function is not called, the fields are initialized to zero. */ void vfu_pci_set_class(vfu_ctx_t *vfu_ctx, uint8_t base, uint8_t sub, uint8_t pi); /* * Returns a pointer to the PCI configuration space. * * PCI config space consists of an initial 64-byte vfu_pci_hdr_t, plus * additional space, containing capabilities and/or device-specific * configuration. Standard config space is 256 bytes (PCI_CFG_SPACE_SIZE); * extended config space is 4096 bytes (PCI_CFG_SPACE_EXP_SIZE). */ vfu_pci_config_space_t * vfu_pci_get_config_space(vfu_ctx_t *vfu_ctx); #define VFU_CAP_FLAG_EXTENDED (1 << 0) #define VFU_CAP_FLAG_CALLBACK (1 << 1) #define VFU_CAP_FLAG_READONLY (1 << 2) /** * Add a PCI capability to PCI config space. * * Certain standard capabilities are handled entirely within the library: * * PCI_CAP_ID_EXP (pxcap) * PCI_CAP_ID_MSI (msicap) * PCI_CAP_ID_MSIX (msixcap) * PCI_CAP_ID_PM (pmcap) * * However, they must still be explicitly initialized and added here. * * The contents of @data are copied in. It must start with either a struct * cap_hdr or a struct ext_cap_hdr, with the ID field set; the 'next' field is * ignored. For PCI_CAP_ID_VNDR or PCI_EXT_CAP_ID_VNDR, the embedded size field * must also be set; in general, any non-fixed-size capability must be * initialized such that the size can be derived at this point. * * If @pos is non-zero, the capability will be placed at the given offset within * configuration space. It must not overlap the PCI standard header, or any * existing capability. Note that if a capability is added "out of order" in * terms of the offset, there is no re-ordering of the capability list written * in configuration space. * * If @pos is zero, the capability will be placed at a suitable offset * automatically. * * The @flags field can be set as follows: * * VFU_CAP_FLAG_EXTENDED: this is an extended capability; supported if device is * of PCI type VFU_PCI_TYPE_{PCI_X_2,EXPRESS}. * * VFU_CAP_FLAG_CALLBACK: all accesses to the capability are delegated to the * callback for the region VFU_PCI_DEV_CFG_REGION_IDX. The callback should copy * data into and out of the capability as needed (this could be directly on the * config space area from vfu_pci_get_config_space()). It is not supported to * allow writes to the initial capability header (ID/next fields). * * VFU_CAP_FLAG_READONLY: this prevents clients from writing to the capability. * By default, clients are allowed to write to any part of the capability, * excluding the initial header. * * Returns the offset of the capability in config space, or -1 on error, with * errno set. * * @vfu_ctx: the libvfio-user context * @pos: specific offset for the capability, or 0. * @flags: VFU_CAP_FLAG_* * @data: capability data, including the header */ ssize_t vfu_pci_add_capability(vfu_ctx_t *vfu_ctx, size_t pos, int flags, void *data); /** * Find the offset within config space of a given capability (if there are * multiple possible matches, use vfu_pci_find_next_capability()). * * Returns 0 if no such capability was found, with errno set. * * @vfu_ctx: the libvfio-user context * @extended whether capability is an extended one or not * @id: capability id (PCI_CAP_ID_* or PCI_EXT_CAP_ID *) */ size_t vfu_pci_find_capability(vfu_ctx_t *vfu_ctx, bool extended, int cap_id); /** * Find the offset within config space of the given capability, starting from * @pos, which must be the valid offset of an existing capability. This can be * used to iterate through multiple capabilities with the same ID. * * Returns 0 if no more matching capabilities were found, with errno set. * * @vfu_ctx: the libvfio-user context * @extended whether capability is an extended one or not * @pos: offset within config space to start looking * @id: capability id (PCI_CAP_ID_*) */ size_t vfu_pci_find_next_capability(vfu_ctx_t *vfu_ctx, bool extended, size_t pos, int cap_id); bool vfu_sg_is_mappable(vfu_ctx_t *vfu_ctx, dma_sg_t *sg); /* * Creates a new ioeventfd at the given setup memory region with @offset, @size, * @fd, @flags and @datamatch. * * Returns 0 on success and -1 on failure with errno set. * * @vfu_ctx: the libvfio-user context * @region_idx: The index of the memory region to set up the ioeventfd * @fd: the value of the file descriptor * @gpa_offset: The offset into the memory region * @size: size of the ioeventfd * @flags: Any flags to set up the ioeventfd * @datamatch: sets the datamatch value * @shadow_fd: File descriptor that can be mmap'ed, KVM will write there the * otherwise discarded value when the ioeventfd is written to. If set to -1 * then a normal ioeventfd is set up instead of a shadow one. The vfio-user * client is free to ignore this, even if it supports shadow ioeventfds. * Requires a kernel with shadow ioeventfd support. * Experimental, must be compiled with SHADOW_IOEVENTFD defined, otherwise * must be -1. * @shadow_offset: offset in shadow memory where value is written to. */ int vfu_create_ioeventfd(vfu_ctx_t *vfu_ctx, uint32_t region_idx, int fd, size_t gpa_offset, uint32_t size, uint32_t flags, uint64_t datamatch, int shadow_fd, size_t shadow_offset); #ifdef __cplusplus } #endif #endif /* LIB_VFIO_USER_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */