# # Copyright (c) 2021 Nutanix Inc. All rights reserved. # # Authors: John Levon # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Nutanix nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. # # # Note that we don't use enum here, as class.value is a little verbose # from types import SimpleNamespace import collections.abc import ctypes as c import array import errno import json import mmap import os import socket import struct import syslog import copy import sys from resource import getpagesize from math import log2 PAGE_SIZE = getpagesize() PAGE_SHIFT = int(log2(PAGE_SIZE)) UINT32_MAX = 0xffffffff UINT64_MAX = 18446744073709551615 # from linux/pci_regs.h and linux/pci_defs.h PCI_HEADER_TYPE_NORMAL = 0 PCI_STD_HEADER_SIZEOF = 64 PCI_BARS_NR = 6 PCI_PM_SIZEOF = 8 PCI_CFG_SPACE_SIZE = 256 PCI_CFG_SPACE_EXP_SIZE = 4096 PCI_CAP_LIST_NEXT = 1 PCI_CAP_ID_PM = 0x1 PCI_CAP_ID_VNDR = 0x9 PCI_CAP_ID_MSI = 0x5 PCI_CAP_ID_MSIX = 0x11 PCI_CAP_ID_EXP = 0x10 PCI_EXP_DEVCTL2 = 40 PCI_EXP_LNKCTL2 = 48 PCI_EXT_CAP_ID_DSN = 0x03 PCI_EXT_CAP_ID_VNDR = 0x0b PCI_EXT_CAP_DSN_SIZEOF = 12 PCI_EXT_CAP_VNDR_HDR_SIZEOF = 8 # MSI registers PCI_MSI_FLAGS = 2 # Message Control offset PCI_MSI_ADDRESS_LO = 4 # Message Address offset PCI_MSI_FLAGS_ENABLE = 0x0001 # MSI enable PCI_CAP_MSI_SIZEOF = 24 # size of MSI registers # MSI-X registers PCI_MSIX_FLAGS = 2 # Message Control PCI_MSIX_TABLE = 4 # Table offset PCI_MSIX_FLAGS_MASKALL = 0x4000 # Mask all vectors for this function PCI_MSIX_FLAGS_ENABLE = 0x8000 # MSI-X enable PCI_CAP_MSIX_SIZEOF = 12 # size of MSIX registers # from linux/vfio.h VFIO_DEVICE_FLAGS_RESET = (1 << 0) VFIO_DEVICE_FLAGS_PCI = (1 << 1) VFIO_REGION_INFO_FLAG_READ = (1 << 0) VFIO_REGION_INFO_FLAG_WRITE = (1 << 1) VFIO_REGION_INFO_FLAG_MMAP = (1 << 2) VFIO_REGION_INFO_FLAG_CAPS = (1 << 3) VFIO_REGION_TYPE_MIGRATION = 3 VFIO_REGION_SUBTYPE_MIGRATION = 1 VFIO_REGION_INFO_CAP_SPARSE_MMAP = 1 VFIO_REGION_INFO_CAP_TYPE = 2 VFIO_IRQ_INFO_EVENTFD = (1 << 0) VFIO_IRQ_SET_DATA_NONE = (1 << 0) VFIO_IRQ_SET_DATA_BOOL = (1 << 1) VFIO_IRQ_SET_DATA_EVENTFD = (1 << 2) VFIO_IRQ_SET_ACTION_MASK = (1 << 3) VFIO_IRQ_SET_ACTION_UNMASK = (1 << 4) VFIO_IRQ_SET_ACTION_TRIGGER = (1 << 5) VFIO_DMA_UNMAP_FLAG_ALL = (1 << 1) # libvfio-user defines VFU_TRANS_SOCK = 0 VFU_TRANS_PIPE = 1 VFU_TRANS_MAX = 2 LIBVFIO_USER_FLAG_ATTACH_NB = (1 << 0) VFU_DEV_TYPE_PCI = 0 LIBVFIO_USER_MAJOR = 0 LIBVFIO_USER_MINOR = 1 VFIO_USER_CLIENT_MAX_FDS_LIMIT = 1024 SERVER_MAX_FDS = 8 ONE_TB = (1024 * 1024 * 1024 * 1024) VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE = (1024 * 1024) SERVER_MAX_DATA_XFER_SIZE = VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE SERVER_MAX_MSG_SIZE = SERVER_MAX_DATA_XFER_SIZE + 16 + 16 def is_32bit(): return (1 << 31) - 1 == sys.maxsize MAX_DMA_REGIONS = 16 # FIXME get from libvfio-user.h MAX_DMA_SIZE = sys.maxsize << 1 if is_32bit() else (8 * ONE_TB) # enum vfio_user_command VFIO_USER_VERSION = 1 VFIO_USER_DMA_MAP = 2 VFIO_USER_DMA_UNMAP = 3 VFIO_USER_DEVICE_GET_INFO = 4 VFIO_USER_DEVICE_GET_REGION_INFO = 5 VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6 VFIO_USER_DEVICE_GET_IRQ_INFO = 7 VFIO_USER_DEVICE_SET_IRQS = 8 VFIO_USER_REGION_READ = 9 VFIO_USER_REGION_WRITE = 10 VFIO_USER_DMA_READ = 11 VFIO_USER_DMA_WRITE = 12 VFIO_USER_DEVICE_RESET = 13 VFIO_USER_REGION_WRITE_MULTI = 15 VFIO_USER_DEVICE_FEATURE = 16 VFIO_USER_MIG_DATA_READ = 17 VFIO_USER_MIG_DATA_WRITE = 18 VFIO_USER_MAX = 19 VFIO_USER_F_TYPE = 0xf VFIO_USER_F_TYPE_COMMAND = 0 VFIO_USER_F_TYPE_REPLY = 1 VFIO_USER_F_NO_REPLY = 0x10 VFIO_USER_F_ERROR = 0x20 SIZEOF_VFIO_USER_HEADER = 16 VFU_PCI_DEV_BAR0_REGION_IDX = 0 VFU_PCI_DEV_BAR1_REGION_IDX = 1 VFU_PCI_DEV_BAR2_REGION_IDX = 2 VFU_PCI_DEV_BAR3_REGION_IDX = 3 VFU_PCI_DEV_BAR4_REGION_IDX = 4 VFU_PCI_DEV_BAR5_REGION_IDX = 5 VFU_PCI_DEV_ROM_REGION_IDX = 6 VFU_PCI_DEV_CFG_REGION_IDX = 7 VFU_PCI_DEV_VGA_REGION_IDX = 8 VFU_PCI_DEV_NUM_REGIONS = 9 VFU_REGION_FLAG_READ = 1 VFU_REGION_FLAG_WRITE = 2 VFU_REGION_FLAG_RW = (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE) VFU_REGION_FLAG_MEM = 4 VFU_REGION_FLAG_ALWAYS_CB = 8 VFIO_USER_F_DMA_REGION_READ = (1 << 0) VFIO_USER_F_DMA_REGION_WRITE = (1 << 1) VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP = (1 << 0) # enum vfio_user_device_mig_state VFIO_USER_DEVICE_STATE_ERROR = 0 VFIO_USER_DEVICE_STATE_STOP = 1 VFIO_USER_DEVICE_STATE_RUNNING = 2 VFIO_USER_DEVICE_STATE_STOP_COPY = 3 VFIO_USER_DEVICE_STATE_RESUMING = 4 VFIO_USER_DEVICE_STATE_RUNNING_P2P = 5 VFIO_USER_DEVICE_STATE_PRE_COPY = 6 VFIO_USER_DEVICE_STATE_PRE_COPY_P2P = 7 VFIO_DEVICE_FEATURE_MASK = 0xffff VFIO_DEVICE_FEATURE_GET = (1 << 16) VFIO_DEVICE_FEATURE_SET = (1 << 17) VFIO_DEVICE_FEATURE_PROBE = (1 << 18) VFIO_DEVICE_FEATURE_MIGRATION = 1 VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE = 2 VFIO_DEVICE_FEATURE_DMA_LOGGING_START = 6 VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP = 7 VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT = 8 VFIO_MIGRATION_STOP_COPY = (1 << 0) VFIO_MIGRATION_P2P = (1 << 1) VFIO_MIGRATION_PRE_COPY = (1 << 2) VFIO_USER_IO_FD_TYPE_IOEVENTFD = 0 VFIO_USER_IO_FD_TYPE_IOREGIONFD = 1 VFIO_USER_IO_FD_TYPE_IOEVENTFD_SHADOW = 2 # enum vfu_migr_state_t VFU_MIGR_STATE_STOP = 0 VFU_MIGR_STATE_RUNNING = 1 VFU_MIGR_STATE_STOP_AND_COPY = 2 VFU_MIGR_STATE_PRE_COPY = 3 VFU_MIGR_STATE_RESUME = 4 # enum vfu_dev_irq_type VFU_DEV_INTX_IRQ = 0 VFU_DEV_MSI_IRQ = 1 VFU_DEV_MSIX_IRQ = 2 VFU_DEV_ERR_IRQ = 3 VFU_DEV_REQ_IRQ = 4 VFU_DEV_NUM_IRQS = 5 # enum vfu_reset_type VFU_RESET_DEVICE = 0 VFU_RESET_LOST_CONN = 1 VFU_RESET_PCI_FLR = 2 # vfu_pci_type_t VFU_PCI_TYPE_CONVENTIONAL = 0 VFU_PCI_TYPE_PCI_X_1 = 1 VFU_PCI_TYPE_PCI_X_2 = 2 VFU_PCI_TYPE_EXPRESS = 3 VFU_CAP_FLAG_EXTENDED = (1 << 0) VFU_CAP_FLAG_CALLBACK = (1 << 1) VFU_CAP_FLAG_READONLY = (1 << 2) VFU_MIGR_CALLBACKS_VERS = 2 SOCK_PATH = b"/tmp/vfio-user.sock.%d" % os.getpid() topdir = os.path.realpath(os.path.dirname(__file__) + "/../..") libname = os.path.join(os.getenv("LIBVFIO_SO_DIR"), "libvfio-user.so") lib = c.CDLL(libname, use_errno=True) libc = c.CDLL("libc.so.6", use_errno=True) # # Structures # class Structure(c.Structure): def __len__(self): """Handy method to return length in bytes.""" return len(bytes(self)) @classmethod def pop_from_buffer(cls, buf): """"Pop a new object from the given bytes buffer.""" obj = cls.from_buffer_copy(buf) return obj, buf[c.sizeof(obj):] class vfu_bar_t(c.Union): _pack_ = 1 _fields_ = [ ("mem", c.c_int32), ("io", c.c_int32) ] class vfu_pci_hdr_intr_t(Structure): _pack_ = 1 _fields_ = [ ("iline", c.c_byte), ("ipin", c.c_byte) ] class vfu_pci_hdr_t(Structure): _pack_ = 1 _fields_ = [ ("id", c.c_int32), ("cmd", c.c_uint16), ("sts", c.c_uint16), ("rid", c.c_byte), ("cc_pi", c.c_byte), ("cc_scc", c.c_byte), ("cc_bcc", c.c_byte), ("cls", c.c_byte), ("mlt", c.c_byte), ("htype", c.c_byte), ("bist", c.c_byte), ("bars", vfu_bar_t * PCI_BARS_NR), ("ccptr", c.c_int32), ("ss", c.c_int32), ("erom", c.c_int32), ("cap", c.c_byte), ("res1", c.c_byte * 7), ("intr", vfu_pci_hdr_intr_t), ("mgnt", c.c_byte), ("mlat", c.c_byte) ] class iovec_t(Structure): _fields_ = [ ("iov_base", c.c_void_p), ("iov_len", c.c_int32) ] def __eq__(self, other): if type(self) is not type(other): return False return self.iov_base == other.iov_base \ and self.iov_len == other.iov_len def __str__(self): return "%s-%s" % \ (hex(self.iov_base or 0), hex((self.iov_base or 0) + self.iov_len)) def __copy__(self): cls = self.__class__ result = cls.__new__(cls) result.iov_base = self.iov_base result.iov_len = self.iov_len return result class vfio_irq_info(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("index", c.c_uint32), ("count", c.c_uint32), ] class vfio_irq_set(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("index", c.c_uint32), ("start", c.c_uint32), ("count", c.c_uint32), ] class vfio_user_device_info(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("num_regions", c.c_uint32), ("num_irqs", c.c_uint32), ] class vfio_region_info(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("index", c.c_uint32), ("cap_offset", c.c_uint32), ("size", c.c_uint64), ("offset", c.c_uint64), ] class vfio_region_info_cap_type(Structure): _pack_ = 1 _fields_ = [ ("id", c.c_uint16), ("version", c.c_uint16), ("next", c.c_uint32), ("type", c.c_uint32), ("subtype", c.c_uint32), ] class vfio_region_info_cap_sparse_mmap(Structure): _pack_ = 1 _fields_ = [ ("id", c.c_uint16), ("version", c.c_uint16), ("next", c.c_uint32), ("nr_areas", c.c_uint32), ("reserved", c.c_uint32), ] class vfio_region_sparse_mmap_area(Structure): _pack_ = 1 _fields_ = [ ("offset", c.c_uint64), ("size", c.c_uint64), ] class vfio_user_region_io_fds_request(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("index", c.c_uint32), ("count", c.c_uint32) ] class vfio_user_sub_region_ioeventfd(Structure): _pack_ = 1 _fields_ = [ ("gpa_offset", c.c_uint64), ("size", c.c_uint64), ("fd_index", c.c_uint32), ("type", c.c_uint32), ("flags", c.c_uint32), ("shadow_mem_fd_index", c.c_uint32), ("shadow_offset", c.c_uint64), ("datamatch", c.c_uint64), ] class vfio_user_sub_region_ioregionfd(Structure): _pack_ = 1 _fields_ = [ ("offset", c.c_uint64), ("size", c.c_uint64), ("fd_index", c.c_uint32), ("type", c.c_uint32), ("flags", c.c_uint32), ("padding", c.c_uint32), ("user_data", c.c_uint64) ] class vfio_user_sub_region_io_fd(c.Union): _pack_ = 1 _fields_ = [ ("sub_region_ioeventfd", vfio_user_sub_region_ioeventfd), ("sub_region_ioregionfd", vfio_user_sub_region_ioregionfd) ] class vfio_user_region_io_fds_reply(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("index", c.c_uint32), ("count", c.c_uint32) ] class vfio_user_dma_map(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("offset", c.c_uint64), ("addr", c.c_uint64), ("size", c.c_uint64), ] class vfio_user_dma_unmap(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32), ("addr", c.c_uint64), ("size", c.c_uint64), ] class vfio_user_dma_region_access(Structure): """Payload for VFIO_USER_DMA_READ and VFIO_USER_DMA_WRITE.""" _pack_ = 1 _fields_ = [ ("addr", c.c_uint64), ("count", c.c_uint64), ] class vfu_dma_info_t(Structure): _fields_ = [ ("iova", iovec_t), ("vaddr", c.c_void_p), ("mapping", iovec_t), ("page_size", c.c_size_t), ("prot", c.c_uint32) ] def __eq__(self, other): if type(self) is not type(other): return False return self.iova == other.iova \ and self.vaddr == other.vaddr \ and self.mapping == other.mapping \ and self.page_size == other.page_size \ and self.prot == other.prot def __str__(self): return "IOVA=%s vaddr=%s mapping=%s page_size=%s prot=%s" % \ (self.iova, self.vaddr, self.mapping, hex(self.page_size), bin(self.prot)) def __copy__(self): cls = self.__class__ result = cls.__new__(cls) result.iova = self.iova result.vaddr = self.vaddr result.mapping = self.mapping result.page_size = self.page_size result.prot = self.prot return result class vfio_user_bitmap(Structure): _pack_ = 1 _fields_ = [ ("pgsize", c.c_uint64), ("size", c.c_uint64) ] class vfio_user_bitmap_range(Structure): _pack_ = 1 _fields_ = [ ("iova", c.c_uint64), ("size", c.c_uint64), ("bitmap", vfio_user_bitmap) ] transition_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int, use_errno=True) read_data_cb_t = c.CFUNCTYPE(c.c_ssize_t, c.c_void_p, c.c_void_p, c.c_uint64) write_data_cb_t = c.CFUNCTYPE(c.c_ssize_t, c.c_void_p, c.c_void_p, c.c_uint64) class vfu_migration_callbacks_t(Structure): _fields_ = [ ("version", c.c_int), ("transition", transition_cb_t), ("read_data", read_data_cb_t), ("write_data", write_data_cb_t), ] class vfio_user_device_feature(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("flags", c.c_uint32) ] class vfio_user_device_feature_migration(Structure): _pack_ = 1 _fields_ = [ ("flags", c.c_uint64) ] class vfio_user_device_feature_mig_state(Structure): _pack_ = 1 _fields_ = [ ("device_state", c.c_uint32), ("data_fd", c.c_uint32), ] class vfio_user_device_feature_dma_logging_control(Structure): _pack_ = 1 _fields_ = [ ("page_size", c.c_uint64), ("num_ranges", c.c_uint32), ("reserved", c.c_uint32), ] class vfio_user_device_feature_dma_logging_range(Structure): _pack_ = 1 _fields_ = [ ("iova", c.c_uint64), ("length", c.c_uint64), ] class vfio_user_device_feature_dma_logging_report(Structure): _pack_ = 1 _fields_ = [ ("iova", c.c_uint64), ("length", c.c_uint64), ("page_size", c.c_uint64) ] class vfio_user_mig_data(Structure): _pack_ = 1 _fields_ = [ ("argsz", c.c_uint32), ("size", c.c_uint32) ] class dma_sg_t(Structure): _fields_ = [ ("dma_addr", c.c_void_p), ("region", c.c_int), ("length", c.c_uint64), ("offset", c.c_uint64), ("writeable", c.c_bool), ] def __str__(self): return "DMA addr=%s, region index=%s, length=%s, offset=%s, RW=%s" % \ (hex(self.dma_addr), self.region, hex(self.length), hex(self.offset), self.writeable) # # Util functions # lib.vfu_create_ctx.argtypes = (c.c_int, c.c_char_p, c.c_int, c.c_void_p, c.c_int) lib.vfu_create_ctx.restype = (c.c_void_p) lib.vfu_setup_log.argtypes = (c.c_void_p, c.c_void_p, c.c_int) lib.vfu_realize_ctx.argtypes = (c.c_void_p,) lib.vfu_attach_ctx.argtypes = (c.c_void_p,) lib.vfu_run_ctx.argtypes = (c.c_void_p,) lib.vfu_destroy_ctx.argtypes = (c.c_void_p,) vfu_region_access_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.POINTER(c.c_char), c.c_ulong, c.c_long, c.c_bool) lib.vfu_setup_region.argtypes = (c.c_void_p, c.c_int, c.c_ulong, vfu_region_access_cb_t, c.c_int, c.c_void_p, c.c_uint32, c.c_int, c.c_uint64) vfu_reset_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, c.c_int) lib.vfu_setup_device_reset_cb.argtypes = (c.c_void_p, vfu_reset_cb_t) lib.vfu_pci_get_config_space.argtypes = (c.c_void_p,) lib.vfu_pci_get_config_space.restype = (c.c_void_p) lib.vfu_setup_device_nr_irqs.argtypes = (c.c_void_p, c.c_int, c.c_uint32) lib.vfu_pci_init.argtypes = (c.c_void_p, c.c_int, c.c_int, c.c_int) lib.vfu_pci_add_capability.argtypes = (c.c_void_p, c.c_ulong, c.c_int, c.POINTER(c.c_byte)) lib.vfu_pci_find_capability.argtypes = (c.c_void_p, c.c_bool, c.c_int) lib.vfu_pci_find_capability.restype = (c.c_ulong) lib.vfu_pci_find_next_capability.argtypes = (c.c_void_p, c.c_bool, c.c_ulong, c.c_int) lib.vfu_pci_find_next_capability.restype = (c.c_ulong) lib.vfu_irq_trigger.argtypes = (c.c_void_p, c.c_uint) vfu_device_quiesce_cb_t = c.CFUNCTYPE(c.c_int, c.c_void_p, use_errno=True) lib.vfu_setup_device_quiesce_cb.argtypes = (c.c_void_p, vfu_device_quiesce_cb_t) vfu_dma_register_cb_t = c.CFUNCTYPE(None, c.c_void_p, c.POINTER(vfu_dma_info_t), use_errno=True) vfu_dma_unregister_cb_t = c.CFUNCTYPE(None, c.c_void_p, c.POINTER(vfu_dma_info_t), use_errno=True) lib.vfu_setup_device_dma.argtypes = (c.c_void_p, vfu_dma_register_cb_t, vfu_dma_unregister_cb_t) lib.vfu_setup_device_migration_callbacks.argtypes = (c.c_void_p, c.POINTER(vfu_migration_callbacks_t)) lib.dma_sg_size.restype = (c.c_size_t) lib.vfu_addr_to_sgl.argtypes = (c.c_void_p, c.c_void_p, c.c_size_t, c.POINTER(dma_sg_t), c.c_size_t, c.c_int) lib.vfu_sgl_get.argtypes = (c.c_void_p, c.POINTER(dma_sg_t), c.POINTER(iovec_t), c.c_size_t, c.c_int) lib.vfu_sgl_put.argtypes = (c.c_void_p, c.POINTER(dma_sg_t), c.POINTER(iovec_t), c.c_size_t) lib.vfu_sgl_read.argtypes = (c.c_void_p, c.POINTER(dma_sg_t), c.c_size_t, c.c_void_p) lib.vfu_sgl_write.argtypes = (c.c_void_p, c.POINTER(dma_sg_t), c.c_size_t, c.c_void_p) lib.vfu_create_ioeventfd.argtypes = (c.c_void_p, c.c_uint32, c.c_int, c.c_size_t, c.c_uint32, c.c_uint32, c.c_uint64, c.c_int32, c.c_uint64) lib.vfu_device_quiesced.argtypes = (c.c_void_p, c.c_int) vfu_dev_irq_state_cb_t = c.CFUNCTYPE(None, c.c_void_p, c.c_uint32, c.c_uint32, c.c_bool, use_errno=True) lib.vfu_setup_irq_state_callback.argtypes = (c.c_void_p, c.c_int, vfu_dev_irq_state_cb_t) def to_byte(val): """Cast an int to a byte value.""" return val.to_bytes(1, 'little') def to_bytes_le(n, length=1): return n.to_bytes(length, 'little') def skip(fmt, buf): """Return the data remaining after skipping the given elements.""" return buf[struct.calcsize(fmt):] def parse_json(json_str): """Parse JSON into an object with attributes (instead of using a dict).""" return json.loads(json_str, object_hook=lambda d: SimpleNamespace(**d)) IOEVENT_SIZE = 8 def eventfd(initval=0, flags=0): libc.eventfd.argtypes = (c.c_uint, c.c_int) return libc.eventfd(initval, flags) def connect_sock(): sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.connect(SOCK_PATH) return sock class Client: """Models a VFIO-user client connected to the server under test.""" def __init__(self, sock=None): self.sock = sock self.client_cmd_socket = None def connect(self, ctx, capabilities={}): self.sock = connect_sock() client_caps = { "capabilities": { "max_data_xfer_size": VFIO_USER_DEFAULT_MAX_DATA_XFER_SIZE, "max_msg_fds": 8, }, } def update(target, overrides): for k, v in overrides.items(): if isinstance(v, collections.abc.Mapping): target[k] = target.get(k, {}) update(target[k], v) else: target[k] = v update(client_caps, capabilities) caps_json = json.dumps(client_caps) # struct vfio_user_version payload = struct.pack("HH%dsc" % len(caps_json), LIBVFIO_USER_MAJOR, LIBVFIO_USER_MINOR, caps_json.encode(), b'\0') hdr = vfio_user_header(VFIO_USER_VERSION, size=len(payload)) self.sock.send(hdr + payload) vfu_attach_ctx(ctx, expect=0) fds, payload = get_reply_fds(self.sock, expect=0) server_caps = json.loads(payload[struct.calcsize("HH"):-1].decode()) try: if (client_caps["capabilities"]["twin_socket"]["supported"] and server_caps["capabilities"]["twin_socket"]["supported"]): index = server_caps["capabilities"]["twin_socket"]["fd_index"] self.client_cmd_socket = socket.socket(fileno=fds[index]) except KeyError: pass return self.sock def disconnect(self, ctx): self.sock.close() self.sock = None if self.client_cmd_socket is not None: self.client_cmd_socket.close() self.client_cmd_socket = None # notice client closed connection vfu_run_ctx(ctx, errno.ENOTCONN) def connect_client(*args, **kwargs): client = Client() client.connect(*args, **kwargs) return client def get_reply(sock, expect=0): buf = sock.recv(4096) (msg_id, cmd, msg_size, flags, errno) = struct.unpack("HHIII", buf[0:16]) assert (flags & VFIO_USER_F_TYPE_REPLY) != 0 assert errno == expect return buf[16:] def send_msg(sock, cmd, msg_type, payload=bytearray(), fds=None, msg_id=None, error_no=0): """ Sends a message on the given socket. Can be used on either end of the socket to send commands and replies. """ hdr = vfio_user_header(cmd, size=len(payload), msg_type=msg_type, msg_id=msg_id, error=error_no != 0, error_no=error_no) if fds: sock.sendmsg([hdr + payload], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack("I" * len(fds), *fds))]) else: sock.send(hdr + payload) def msg(ctx, sock, cmd, payload=bytearray(), expect=0, fds=None, rsp=True, busy=False): """ Round trip a request and reply to the server. vfu_run_ctx will be called once for the server to process the incoming message, If a response is not expected then @rsp must be set to False, otherwise this function will block indefinitely. If busy is True, then we expect the server to have returned EBUSY from a quiesce callback, and hence vfu_run_ctx(); in this case, there will be no response: it can later be retrieved, post vfu_device_quiesced(), with get_reply(). """ send_msg(sock, cmd, VFIO_USER_F_TYPE_COMMAND, payload, fds) if busy: vfu_run_ctx(ctx, errno.EBUSY) rsp = False else: vfu_run_ctx(ctx) if not rsp: return return get_reply(sock, expect=expect) def get_msg_fds(sock, expect_msg_type, expect_errno=0): """ Receives a message from a socket and pulls the returned file descriptors out of the message. """ fds = array.array("i") data, ancillary, flags, addr = sock.recvmsg(SERVER_MAX_MSG_SIZE, socket.CMSG_LEN(64 * fds.itemsize)) (msg_id, cmd, msg_size, msg_flags, errno) = struct.unpack("HHIII", data[0:16]) assert errno == expect_errno cmsg_level, cmsg_type, packed_fd = ancillary[0] if len(ancillary) != 0 \ else (0, 0, []) unpacked_fds = [] for i in range(0, len(packed_fd), 4): [unpacked_fd] = struct.unpack_from("i", packed_fd, offset=i) unpacked_fds.append(unpacked_fd) assert len(packed_fd)/4 == len(unpacked_fds) assert (msg_flags & VFIO_USER_F_TYPE) == expect_msg_type return (unpacked_fds, msg_id, cmd, data[16:]) def get_reply_fds(sock, expect=0): """ Receives a reply from a socket and returns the included file descriptors and message payload data. """ (unpacked_fds, _, _, data) = get_msg_fds(sock, VFIO_USER_F_TYPE_REPLY, expect) return (unpacked_fds, data) def msg_fds(ctx, sock, cmd, payload, expect=0, fds=None): """Round trip a request and reply to the server. With the server returning new fds""" hdr = vfio_user_header(cmd, size=len(payload)) if fds: sock.sendmsg([hdr + payload], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack("I" * len(fds), *fds))]) else: sock.send(hdr + payload) vfu_run_ctx(ctx) return get_reply_fds(sock, expect=expect) def get_pci_header(ctx): ptr = lib.vfu_pci_get_config_space(ctx) return c.cast(ptr, c.POINTER(vfu_pci_hdr_t)).contents def get_pci_cfg_space(ctx): ptr = lib.vfu_pci_get_config_space(ctx) return c.cast(ptr, c.POINTER(c.c_char))[0:PCI_CFG_SPACE_SIZE] def get_pci_ext_cfg_space(ctx): ptr = lib.vfu_pci_get_config_space(ctx) return c.cast(ptr, c.POINTER(c.c_char))[0:PCI_CFG_SPACE_EXP_SIZE] def read_pci_cfg_space(ctx, buf, count, offset, extended=False): space = get_pci_ext_cfg_space(ctx) if extended else get_pci_cfg_space(ctx) for i in range(count): buf[i] = space[offset+i] return count def write_pci_cfg_space(ctx, buf, count, offset, extended=False): max_offset = PCI_CFG_SPACE_EXP_SIZE if extended else PCI_CFG_SPACE_SIZE assert offset + count <= max_offset space = c.cast(lib.vfu_pci_get_config_space(ctx), c.POINTER(c.c_char)) for i in range(count): # FIXME this assignment doesn't update the actual config space, it # works fine on x86_64 space[offset+i] = buf[i] return count def access_region(ctx, sock, is_write, region, offset, count, data=None, expect=0, rsp=True, busy=False): # struct vfio_user_region_access payload = struct.pack("QII", offset, region, count) if is_write: payload += data cmd = VFIO_USER_REGION_WRITE if is_write else VFIO_USER_REGION_READ result = msg(ctx, sock, cmd, payload, expect=expect, rsp=rsp, busy=busy) if is_write: return None if rsp: return skip("QII", result) def write_region(ctx, sock, region, offset, count, data, expect=0, rsp=True, busy=False): access_region(ctx, sock, True, region, offset, count, data, expect=expect, rsp=rsp, busy=busy) def read_region(ctx, sock, region, offset, count, expect=0, rsp=True, busy=False): return access_region(ctx, sock, False, region, offset, count, expect=expect, rsp=rsp, busy=busy) def ext_cap_hdr(buf, offset): """Read an extended cap header.""" # struct pcie_ext_cap_hdr cap_id, cap_next = struct.unpack_from('HH', buf, offset) cap_next >>= 4 return cap_id, cap_next def dma_register(ctx, info): pass @vfu_dma_register_cb_t def __dma_register(ctx, info): # The copy is required because in case of deliberate failure (e.g. # test_dma_map_busy_reply_fail) the memory gets deallocated and mock only # records the pointer, so the contents are all null/zero. dma_register(ctx, copy.copy(info.contents)) def dma_unregister(ctx, info): pass @vfu_dma_unregister_cb_t def __dma_unregister(ctx, info): dma_unregister(ctx, copy.copy(info.contents)) def setup_flrc(ctx): # flrc cap = struct.pack("ccHHcc52c", to_byte(PCI_CAP_ID_EXP), b'\0', 0, 0, b'\0', b'\x10', *[b'\0' for _ in range(52)]) # FIXME adding capability after we've realized the device only works # because of bug #618. pos = vfu_pci_add_capability(ctx, pos=0, flags=0, data=cap) assert pos == PCI_STD_HEADER_SIZEOF def quiesce_cb(ctx): return 0 @vfu_device_quiesce_cb_t def _quiesce_cb(ctx): return quiesce_cb(ctx) def vfu_setup_device_quiesce_cb(ctx, quiesce_cb=_quiesce_cb): assert ctx is not None lib.vfu_setup_device_quiesce_cb(ctx, c.cast(quiesce_cb, vfu_device_quiesce_cb_t)) def reset_cb(ctx, reset_type): return 0 @vfu_reset_cb_t def _reset_cb(ctx, reset_type): return reset_cb(ctx, reset_type) def vfu_setup_device_reset_cb(ctx, cb=_reset_cb): assert ctx is not None return lib.vfu_setup_device_reset_cb(ctx, c.cast(cb, vfu_reset_cb_t)) def prepare_ctx_for_dma(dma_register=__dma_register, dma_unregister=__dma_unregister, quiesce=_quiesce_cb, reset=_reset_cb, migration_callbacks=False): ctx = vfu_create_ctx(flags=LIBVFIO_USER_FLAG_ATTACH_NB) assert ctx is not None ret = vfu_pci_init(ctx) assert ret == 0 ret = vfu_setup_device_dma(ctx, dma_register, dma_unregister) assert ret == 0 if quiesce is not None: vfu_setup_device_quiesce_cb(ctx, quiesce) if reset is not None: ret = vfu_setup_device_reset_cb(ctx, reset) assert ret == 0 if migration_callbacks: ret = vfu_setup_device_migration_callbacks(ctx) assert ret == 0 ret = vfu_realize_ctx(ctx) assert ret == 0 return ctx def transition_to_state(ctx, sock, state, expect=0, rsp=True, busy=False): feature = vfio_user_device_feature( argsz=len(vfio_user_device_feature()) + len(vfio_user_device_feature_mig_state()), flags=VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE ) payload = vfio_user_device_feature_mig_state(device_state=state) msg(ctx, sock, VFIO_USER_DEVICE_FEATURE, bytes(feature) + bytes(payload), expect=expect, rsp=rsp, busy=busy) # # Library wrappers # next_msg_id = 1 @c.CFUNCTYPE(None, c.c_void_p, c.c_int, c.c_char_p) def log(ctx, level, msg): lvl2str = {syslog.LOG_EMERG: "EMERGENCY", syslog.LOG_ALERT: "ALERT", syslog.LOG_CRIT: "CRITICAL", syslog.LOG_ERR: "ERROR", syslog.LOG_WARNING: "WANRING", syslog.LOG_NOTICE: "NOTICE", syslog.LOG_INFO: "INFO", syslog.LOG_DEBUG: "DEBUG"} print(lvl2str[level] + ": " + msg.decode("utf-8")) def vfio_user_header(cmd, size, msg_type=VFIO_USER_F_TYPE_COMMAND, msg_id=None, no_reply=False, error=False, error_no=0): global next_msg_id if msg_id is None: msg_id = next_msg_id next_msg_id += 1 flags = msg_type if no_reply: flags |= VFIO_USER_F_NO_REPLY if error: flags |= VFIO_USER_F_ERROR buf = struct.pack("HHIII", msg_id, cmd, SIZEOF_VFIO_USER_HEADER + size, flags, error_no) return buf def vfu_create_ctx(trans=VFU_TRANS_SOCK, sock_path=SOCK_PATH, flags=0, private=None, dev_type=VFU_DEV_TYPE_PCI): if os.path.exists(sock_path): os.remove(sock_path) ctx = lib.vfu_create_ctx(trans, sock_path, flags, private, dev_type) if ctx: lib.vfu_setup_log(ctx, log, syslog.LOG_DEBUG) return ctx def vfu_realize_ctx(ctx): return lib.vfu_realize_ctx(ctx) def vfu_attach_ctx(ctx, expect=0): ret = lib.vfu_attach_ctx(ctx) if expect == 0: assert ret == 0, "failed to attach: %s" % os.strerror(c.get_errno()) else: assert ret == -1 assert c.get_errno() == expect return ret def vfu_run_ctx(ctx, expect=0): ret = lib.vfu_run_ctx(ctx) if expect == 0: assert ret >= 0, "vfu_run_ctx(): %s" % os.strerror(c.get_errno()) else: assert ret == -1 assert c.get_errno() == expect return ret def vfu_destroy_ctx(ctx): lib.vfu_destroy_ctx(ctx) ctx = None if os.path.exists(SOCK_PATH): os.remove(SOCK_PATH) def pci_region_cb(ctx, buf, count, offset, is_write): pass @vfu_region_access_cb_t def __pci_region_cb(ctx, buf, count, offset, is_write): return pci_region_cb(ctx, buf, count, offset, is_write) def vfu_setup_region(ctx, index, size, cb=__pci_region_cb, flags=0, mmap_areas=None, nr_mmap_areas=None, fd=-1, offset=0): assert ctx is not None c_mmap_areas = None if mmap_areas: c_mmap_areas = (iovec_t * len(mmap_areas))(*mmap_areas) if nr_mmap_areas is None: if mmap_areas: nr_mmap_areas = len(mmap_areas) else: nr_mmap_areas = 0 # We're sending a file descriptor to ourselves; to pretend the server is # separate, we need to dup() here. if fd != -1: fd = os.dup(fd) ret = lib.vfu_setup_region(ctx, index, size, c.cast(cb, vfu_region_access_cb_t), flags, c_mmap_areas, nr_mmap_areas, fd, offset) if fd != -1 and ret != 0: os.close(fd) return ret def vfu_setup_device_nr_irqs(ctx, irqtype, count): assert ctx is not None return lib.vfu_setup_device_nr_irqs(ctx, irqtype, count) def irq_state(ctx, start, count, mask): pass @vfu_dev_irq_state_cb_t def __irq_state(ctx, start, count, mask): irq_state(ctx, start, count, mask) def vfu_setup_irq_state_callback(ctx, irqtype, cb=__irq_state): assert ctx is not None return lib.vfu_setup_irq_state_callback(ctx, irqtype, cb) def vfu_pci_init(ctx, pci_type=VFU_PCI_TYPE_EXPRESS, hdr_type=PCI_HEADER_TYPE_NORMAL): assert ctx is not None return lib.vfu_pci_init(ctx, pci_type, hdr_type, 0) def vfu_pci_add_capability(ctx, pos, flags, data): assert ctx is not None databuf = (c.c_byte * len(data)).from_buffer(bytearray(data)) return lib.vfu_pci_add_capability(ctx, pos, flags, databuf) def vfu_pci_find_capability(ctx, extended, cap_id): assert ctx is not None return lib.vfu_pci_find_capability(ctx, extended, cap_id) def vfu_pci_find_next_capability(ctx, extended, offset, cap_id): assert ctx is not None return lib.vfu_pci_find_next_capability(ctx, extended, offset, cap_id) def vfu_irq_trigger(ctx, subindex): assert ctx is not None return lib.vfu_irq_trigger(ctx, subindex) def vfu_setup_device_dma(ctx, register_cb=None, unregister_cb=None): assert ctx is not None return lib.vfu_setup_device_dma(ctx, c.cast(register_cb, vfu_dma_register_cb_t), c.cast(unregister_cb, vfu_dma_unregister_cb_t)) # FIXME some of the migration arguments are probably wrong as in the C version # they're pointer. Check how we handle the read/write region callbacks. def migr_trans_cb(ctx, state): pass @transition_cb_t def __migr_trans_cb(ctx, state): return migr_trans_cb(ctx, state) def migr_read_data_cb(ctx, buf, count, offset): pass @read_data_cb_t def __migr_read_data_cb(ctx, buf, count, offset): return migr_read_data_cb(ctx, buf, count, offset) def migr_write_data_cb(ctx, buf, count, offset): pass @write_data_cb_t def __migr_write_data_cb(ctx, buf, count, offset): return migr_write_data_cb(ctx, buf, count, offset) def vfu_setup_device_migration_callbacks(ctx, cbs=None): assert ctx is not None if not cbs: cbs = vfu_migration_callbacks_t() cbs.version = VFU_MIGR_CALLBACKS_VERS cbs.transition = __migr_trans_cb cbs.read_data = __migr_read_data_cb cbs.write_data = __migr_write_data_cb return lib.vfu_setup_device_migration_callbacks(ctx, cbs) def dma_sg_size(): return lib.dma_sg_size() def vfu_addr_to_sgl(ctx, dma_addr, length, max_nr_sgs=1, prot=(mmap.PROT_READ | mmap.PROT_WRITE)): assert ctx is not None sg = (dma_sg_t * max_nr_sgs)() return (lib.vfu_addr_to_sgl(ctx, dma_addr, length, sg, max_nr_sgs, prot), sg) def vfu_sgl_get(ctx, sg, iovec, cnt=1, flags=0): return lib.vfu_sgl_get(ctx, sg, iovec, cnt, flags) def vfu_sgl_put(ctx, sg, iovec, cnt=1): return lib.vfu_sgl_put(ctx, sg, iovec, cnt) def vfu_sgl_read(ctx, sg, cnt=1): data = bytearray(sum([sge.length for sge in sg])) buf = (c.c_byte * len(data)).from_buffer(data) return lib.vfu_sgl_read(ctx, sg, cnt, buf), data def vfu_sgl_write(ctx, sg, cnt=1, data=bytearray()): assert len(data) == sum([sge.length for sge in sg]) buf = (c.c_byte * len(data)).from_buffer(data) return lib.vfu_sgl_write(ctx, sg, cnt, buf) def vfu_create_ioeventfd(ctx, region_idx, fd, gpa_offset, size, flags, datamatch, shadow_fd=-1, shadow_offset=0): assert ctx is not None return lib.vfu_create_ioeventfd(ctx, region_idx, fd, gpa_offset, size, flags, datamatch, shadow_fd, shadow_offset) def vfu_device_quiesced(ctx, err): return lib.vfu_device_quiesced(ctx, err) def fail_with_errno(err): def side_effect(args, *kwargs): c.set_errno(err) return -1 return side_effect def fds_are_same(fd1: int, fd2: int) -> bool: s1 = os.stat(fd1) s2 = os.stat(fd2) return s1.st_dev == s2.st_dev and s1.st_ino == s2.st_ino def get_bitmap_size(size: int, pgsize: int) -> int: """ Returns the size, in bytes, of the bitmap that represents the given range with the given page size. """ nr_pages = (size // pgsize) + (1 if size % pgsize != 0 else 0) return ((nr_pages + 63) & ~63) // 8 get_errno_loc = libc.__errno_location get_errno_loc.restype = c.POINTER(c.c_int) def set_real_errno(errno: int): """ ctypes's errno is an internal value that only updates the real value when the foreign function call returns. In callbacks, however, this doesn't happen, so `c.set_errno` doesn't propagate in time. In this case we need to manually set the real errno. """ c.set_errno(errno) # set internal errno so `c.get_errno` gives right value get_errno_loc()[0] = errno # set real errno # ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: #