diff options
Diffstat (limited to 'subprojects/libvhost-user/libvhost-user.h')
-rw-r--r-- | subprojects/libvhost-user/libvhost-user.h | 690 |
1 files changed, 690 insertions, 0 deletions
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h new file mode 100644 index 0000000..7d47f13 --- /dev/null +++ b/subprojects/libvhost-user/libvhost-user.h @@ -0,0 +1,690 @@ +/* + * Vhost User library + * + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Victor Kaplansky <victork@redhat.com> + * Marc-André Lureau <mlureau@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef LIBVHOST_USER_H +#define LIBVHOST_USER_H + +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> +#include <sys/poll.h> +#include <linux/vhost.h> +#include <pthread.h> +#include "standard-headers/linux/virtio_ring.h" + +/* Based on qemu/hw/virtio/vhost-user.c */ +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_LOG_PAGE 4096 + +#define VIRTQUEUE_MAX_SIZE 1024 + +#define VHOST_MEMORY_BASELINE_NREGIONS 8 + +/* + * Set a reasonable maximum number of ram slots, which will be supported by + * any architecture. + */ +#define VHOST_USER_MAX_RAM_SLOTS 32 + +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CREATE_CRYPTO_SESSION = 26, + VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_GPU_SET_SOCKET = 33, + VHOST_USER_VRING_KICK = 35, + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_MAX +} VhostUserRequest; + +typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_NONE = 0, + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, + VHOST_USER_SLAVE_VRING_ERR = 5, + VHOST_USER_SLAVE_MAX +} VhostUserSlaveRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMemRegMsg { + uint64_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserConfig { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +} VhostUserConfig; + +static VhostUserConfig c __attribute__ ((unused)); +#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ + + sizeof(c.size) \ + + sizeof(c.flags)) + +typedef struct VhostUserVringArea { + uint64_t u64; + uint64_t size; + uint64_t offset; +} VhostUserVringArea; + +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + +#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) +# define VU_PACKED __attribute__((gcc_struct, packed)) +#else +# define VU_PACKED __attribute__((packed)) +#endif + +typedef struct VhostUserMsg { + int request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1 << 2) +#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) + uint32_t flags; + uint32_t size; /* the following payload size */ + + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserMemRegMsg memreg; + VhostUserLog log; + VhostUserConfig config; + VhostUserVringArea area; + VhostUserInflight inflight; + } payload; + + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; + int fd_num; + uint8_t *data; +} VU_PACKED VhostUserMsg; + +typedef struct VuDevRegion { + /* Guest Physical address. */ + uint64_t gpa; + /* Memory region size. */ + uint64_t size; + /* QEMU virtual address (userspace). */ + uint64_t qva; + /* Starting offset in our mmaped space. */ + uint64_t mmap_offset; + /* Start address of mmaped space. */ + uint64_t mmap_addr; +} VuDevRegion; + +typedef struct VuDev VuDev; + +typedef uint64_t (*vu_get_features_cb) (VuDev *dev); +typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features); +typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg, + int *do_reply); +typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg); +typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started); +typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx); +typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); +typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); + +typedef struct VuDevIface { + /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ + vu_get_features_cb get_features; + /* enable vhost implementation features */ + vu_set_features_cb set_features; + /* get the protocol feature bitmask from the underlying vhost + * implementation */ + vu_get_features_cb get_protocol_features; + /* enable protocol features in the underlying vhost implementation. */ + vu_set_features_cb set_protocol_features; + /* process_msg is called for each vhost-user message received */ + /* skip libvhost-user processing if return value != 0 */ + vu_process_msg_cb process_msg; + /* tells when queues can be processed */ + vu_queue_set_started_cb queue_set_started; + /* + * If the queue is processed in order, in which case it will be + * resumed to vring.used->idx. This can help to support resuming + * on unmanaged exit/crash. + */ + vu_queue_is_processed_in_order_cb queue_is_processed_in_order; + /* get the config space of the device */ + vu_get_config_cb get_config; + /* set the config space of the device */ + vu_set_config_cb set_config; +} VuDevIface; + +typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); + +typedef struct VuRing { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint64_t log_guest_addr; + uint32_t flags; +} VuRing; + +typedef struct VuDescStateSplit { + /* Indicate whether this descriptor is inflight or not. + * Only available for head-descriptor. */ + uint8_t inflight; + + /* Padding */ + uint8_t padding[5]; + + /* Maintain a list for the last batch of used descriptors. + * Only available when batching is used for submitting */ + uint16_t next; + + /* Used to preserve the order of fetching available descriptors. + * Only available for head-descriptor. */ + uint64_t counter; +} VuDescStateSplit; + +typedef struct VuVirtqInflight { + /* The feature flags of this region. Now it's initialized to 0. */ + uint64_t features; + + /* The version of this region. It's 1 currently. + * Zero value indicates a vm reset happened. */ + uint16_t version; + + /* The size of VuDescStateSplit array. It's equal to the virtqueue + * size. Slave could get it from queue size field of VhostUserInflight. */ + uint16_t desc_num; + + /* The head of list that track the last batch of used descriptors. */ + uint16_t last_batch_head; + + /* Storing the idx value of used ring */ + uint16_t used_idx; + + /* Used to track the state of each descriptor in descriptor table */ + VuDescStateSplit desc[]; +} VuVirtqInflight; + +typedef struct VuVirtqInflightDesc { + uint16_t index; + uint64_t counter; +} VuVirtqInflightDesc; + +typedef struct VuVirtq { + VuRing vring; + + VuVirtqInflight *inflight; + + VuVirtqInflightDesc *resubmit_list; + + uint16_t resubmit_num; + + uint64_t counter; + + /* Next head to pop */ + uint16_t last_avail_idx; + + /* Last avail_idx read from VQ. */ + uint16_t shadow_avail_idx; + + uint16_t used_idx; + + /* Last used index value we have signalled on */ + uint16_t signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + + /* Notification enabled? */ + bool notification; + + int inuse; + + vu_queue_handler_cb handler; + + int call_fd; + int kick_fd; + int err_fd; + unsigned int enable; + bool started; + + /* Guest addresses of our ring */ + struct vhost_vring_addr vra; +} VuVirtq; + +enum VuWatchCondtion { + VU_WATCH_IN = POLLIN, + VU_WATCH_OUT = POLLOUT, + VU_WATCH_PRI = POLLPRI, + VU_WATCH_ERR = POLLERR, + VU_WATCH_HUP = POLLHUP, +}; + +typedef void (*vu_panic_cb) (VuDev *dev, const char *err); +typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data); +typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition, + vu_watch_cb cb, void *data); +typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd); + +typedef struct VuDevInflightInfo { + int fd; + void *addr; + uint64_t size; +} VuDevInflightInfo; + +struct VuDev { + int sock; + uint32_t nregions; + VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS]; + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; + /* Must be held while using slave_fd */ + pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; + uint64_t features; + uint64_t protocol_features; + bool broken; + uint16_t max_queues; + + /* + * @read_msg: custom method to read vhost-user message + * + * Read data from vhost_user socket fd and fill up + * the passed VhostUserMsg *vmsg struct. + * + * If reading fails, it should close the received set of file + * descriptors as socket message's auxiliary data. + * + * For the details, please refer to vu_message_read in libvhost-user.c + * which will be used by default if not custom method is provided when + * calling vu_init + * + * Returns: true if vhost-user message successfully received, + * otherwise return false. + * + */ + vu_read_msg_cb read_msg; + + /* + * @set_watch: add or update the given fd to the watch set, + * call cb when condition is met. + */ + vu_set_watch_cb set_watch; + + /* @remove_watch: remove the given fd from the watch set */ + vu_remove_watch_cb remove_watch; + + /* + * @panic: encountered an unrecoverable error, you may try to re-initialize + */ + vu_panic_cb panic; + const VuDevIface *iface; + + /* Postcopy data */ + int postcopy_ufd; + bool postcopy_listening; +}; + +typedef struct VuVirtqElement { + unsigned int index; + unsigned int out_num; + unsigned int in_num; + struct iovec *in_sg; + struct iovec *out_sg; +} VuVirtqElement; + +/** + * vu_init: + * @dev: a VuDev context + * @max_queues: maximum number of virtqueues + * @socket: the socket connected to vhost-user master + * @panic: a panic callback + * @set_watch: a set_watch callback + * @remove_watch: a remove_watch callback + * @iface: a VuDevIface structure with vhost-user device callbacks + * + * Initializes a VuDev vhost-user context. + * + * Returns: true on success, false on failure. + **/ +bool vu_init(VuDev *dev, + uint16_t max_queues, + int socket, + vu_panic_cb panic, + vu_read_msg_cb read_msg, + vu_set_watch_cb set_watch, + vu_remove_watch_cb remove_watch, + const VuDevIface *iface); + + +/** + * vu_deinit: + * @dev: a VuDev context + * + * Cleans up the VuDev context + */ +void vu_deinit(VuDev *dev); + +/** + * vu_dispatch: + * @dev: a VuDev context + * + * Process one vhost-user message. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_dispatch(VuDev *dev); + +/** + * vu_gpa_to_va: + * @dev: a VuDev context + * @plen: guest memory size + * @guest_addr: guest address + * + * Translate a guest address to a pointer. Returns NULL on failure. + */ +void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr); + +/** + * vu_get_queue: + * @dev: a VuDev context + * @qidx: queue index + * + * Returns the queue number @qidx. + */ +VuVirtq *vu_get_queue(VuDev *dev, int qidx); + +/** + * vu_set_queue_handler: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @handler: the queue handler callback + * + * Set the queue handler. This function may be called several times + * for the same queue. If called with NULL @handler, the handler is + * removed. + */ +void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, + vu_queue_handler_cb handler); + +/** + * vu_set_queue_host_notifier: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @fd: a file descriptor + * @size: host page size + * @offset: notifier offset in @fd file + * + * Set queue's host notifier. This function may be called several + * times for the same queue. If called with -1 @fd, the notifier + * is removed. + */ +bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + int size, int offset); + +/** + * vu_queue_set_notification: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @enable: state + * + * Set whether the queue notifies (via event index or interrupt) + */ +void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable); + +/** + * vu_queue_enabled: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: whether the queue is enabled. + */ +bool vu_queue_enabled(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_started: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: whether the queue is started. + */ +bool vu_queue_started(const VuDev *dev, const VuVirtq *vq); + +/** + * vu_queue_empty: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: true if the queue is empty or not ready. + */ +bool vu_queue_empty(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_notify: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Request to notify the queue via callfd (skipped if unnecessary) + */ +void vu_queue_notify(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_notify_sync: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Request to notify the queue via callfd (skipped if unnecessary) + * or sync message if possible. + */ +void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_pop: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @sz: the size of struct to return (must be >= VuVirtqElement) + * + * Returns: a VuVirtqElement filled from the queue or NULL. The + * returned element must be free()-d by the caller. + */ +void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz); + + +/** + * vu_queue_unpop: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: The #VuVirtqElement + * @len: number of bytes written + * + * Pretend the most recent element wasn't popped from the virtqueue. The next + * call to vu_queue_pop() will refetch the element. + */ +void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem, + size_t len); + +/** + * vu_queue_rewind: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @num: number of elements to push back + * + * Pretend that elements weren't popped from the virtqueue. The next + * virtqueue_pop() will refetch the oldest element. + * + * Returns: true on success, false if @num is greater than the number of in use + * elements. + */ +bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num); + +/** + * vu_queue_fill: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: a VuVirtqElement + * @len: length in bytes to write + * @idx: optional offset for the used ring index (0 in general) + * + * Fill the used ring with @elem element. + */ +void vu_queue_fill(VuDev *dev, VuVirtq *vq, + const VuVirtqElement *elem, + unsigned int len, unsigned int idx); + +/** + * vu_queue_push: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: a VuVirtqElement + * @len: length in bytes to write + * + * Helper that combines vu_queue_fill() with a vu_queue_flush(). + */ +void vu_queue_push(VuDev *dev, VuVirtq *vq, + const VuVirtqElement *elem, unsigned int len); + +/** + * vu_queue_flush: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @num: number of elements to flush + * + * Mark the last number of elements as done (used.idx is updated by + * num elements). +*/ +void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num); + +/** + * vu_queue_get_avail_bytes: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @in_bytes: in bytes + * @out_bytes: out bytes + * @max_in_bytes: stop counting after max_in_bytes + * @max_out_bytes: stop counting after max_out_bytes + * + * Count the number of available bytes, up to max_in_bytes/max_out_bytes. + */ +void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes); + +/** + * vu_queue_avail_bytes: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @in_bytes: expected in bytes + * @out_bytes: expected out bytes + * + * Returns: true if in_bytes <= in_total && out_bytes <= out_total + */ +bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, + unsigned int out_bytes); + +#endif /* LIBVHOST_USER_H */ |