diff options
author | Felipe Franciosi <felipe@nutanix.com> | 2019-07-02 14:06:42 +0100 |
---|---|---|
committer | Felipe Franciosi <felipe@nutanix.com> | 2019-09-05 16:45:35 +0100 |
commit | f8ef2771ca6c05dadd3188099eb678e6135e12e2 (patch) | |
tree | 1629283ee553622ce99477c63da4994d4c87bc0f /lib | |
download | libvfio-user-f8ef2771ca6c05dadd3188099eb678e6135e12e2.zip libvfio-user-f8ef2771ca6c05dadd3188099eb678e6135e12e2.tar.gz libvfio-user-f8ef2771ca6c05dadd3188099eb678e6135e12e2.tar.bz2 |
Initial commit
Diffstat (limited to 'lib')
-rw-r--r-- | lib/.indent.pro | 4 | ||||
-rw-r--r-- | lib/CMakeLists.txt | 46 | ||||
-rw-r--r-- | lib/common.h | 60 | ||||
-rw-r--r-- | lib/dma.c | 331 | ||||
-rw-r--r-- | lib/dma.h | 241 | ||||
-rw-r--r-- | lib/libmuser.c | 1063 | ||||
-rw-r--r-- | lib/libmuser_pci.c | 311 | ||||
-rw-r--r-- | lib/msicap.h | 67 | ||||
-rw-r--r-- | lib/muser.h | 185 | ||||
-rw-r--r-- | lib/pci.h | 276 | ||||
-rw-r--r-- | lib/pmcap.h | 70 | ||||
-rw-r--r-- | lib/pxcap.h | 144 |
12 files changed, 2798 insertions, 0 deletions
diff --git a/lib/.indent.pro b/lib/.indent.pro new file mode 100644 index 0000000..52ef8f2 --- /dev/null +++ b/lib/.indent.pro @@ -0,0 +1,4 @@ +-nbad -bap -nbc -bbo -hnl -br -brs -c33 -cd33 -ncdb -ce -ci4 +-cli0 -d0 -di1 -nfc1 -i4 -ip0 -l80 -lp -npcs -nprs -psl -sai +-saf -saw -ncs -nsc -nsob -nfca -cp33 -ss -ts8 -il0 +-nut -blf diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 0000000..6d3d0ae --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,46 @@ +# +# Copyright (c) 2019 Nutanix Inc. All rights reserved. +# +# Authors: Thanos Makatos <thanos@nutanix.com> +# Swapnil Ingle <swapnil.ingle@nutanix.com> +# Felipe Franciosi <felipe@nutanix.com> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Nutanix nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +add_library(muser SHARED + ../kmod/muser.h + muser.h + pci.h + pmcap.h + msicap.h + pxcap.h + common.h + dma.h + dma.c + libmuser.c + libmuser_pci.c) +set_target_properties(muser PROPERTIES PUBLIC_HEADER "muser.h;pci.h;pmcap.h;msicap.h;pxcap.h") +install(TARGETS muser + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/muser) diff --git a/lib/common.h b/lib/common.h new file mode 100644 index 0000000..4fbc048 --- /dev/null +++ b/lib/common.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#include <stdint.h> + +#define PAGE_SIZE sysconf(_SC_PAGE_SIZE) +#define PAGE_ALIGNED(x) (((x) & ((typeof(x))(PAGE_SIZE) - 1)) == 0) + +#define BIT(nr) (1UL << (nr)) + +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + +#define likely(e) __builtin_expect(!!(e), 1) +#define unlikely(e) __builtin_expect(e, 0) + +#define ROUND_DOWN(x, a) ((x) & ~((a)-1)) +#define ROUND_UP(x,a) ROUND_DOWN((x)+(a)-1, a) + +void lm_log(lm_ctx_t const *const lm_ctx, const lm_log_lvl_t lvl, + char const *const fmt, ...); + +void dump_buffer(lm_ctx_t const *const lm_ctx, char const *const prefix, + unsigned char const *const buf, uint32_t count); + + +#endif /* __COMMON_H__ */ + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/dma.c b/lib/dma.c new file mode 100644 index 0000000..5c9455f --- /dev/null +++ b/lib/dma.c @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Mike Cui <cui@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/param.h> + +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <stdlib.h> + +#include <errno.h> + +#include "dma.h" + +static inline ssize_t +fd_get_blocksize(int fd) +{ + struct stat st; + + if (fstat(fd, &st) != 0) + return -1; + + return st.st_blksize; +} + +/* Returns true if 2 fds refer to the same file. + If any fd is invalid, return false. */ +static inline bool +fds_are_same_file(int fd1, int fd2) +{ + struct stat st1, st2; + + return (fstat(fd1, &st1) == 0 && fstat(fd2, &st2) == 0 && + st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); +} + +dma_controller_t * +dma_controller_create(int max_regions) +{ + dma_controller_t *dma; + + dma = malloc(offsetof(dma_controller_t, regions) + + max_regions * sizeof(dma->regions[0])); + + if (dma == NULL) { + return dma; + } + + dma->max_regions = max_regions; + dma->nregions = 0; + memset(dma->regions, 0, max_regions * sizeof(dma->regions[0])); + + return dma; +} + +static void +_dma_controller_do_remove_region(dma_memory_region_t * const region) +{ + assert(region); +#if DMA_MAP_FAST_IMPL + dma_unmap_region(region, region->virt_addr, region->size); +#endif + (void)close(region->fd); +} + +/* FIXME not thread safe */ +int +dma_controller_remove_region(dma_controller_t * dma, dma_addr_t dma_addr, + size_t size, int fd) +{ + int idx; + dma_memory_region_t *region; + + assert(dma); + + for (idx = 0; idx < dma->nregions; idx++) { + region = &dma->regions[idx]; + if (region->dma_addr == dma_addr && region->size == size && + fds_are_same_file(region->fd, fd)) { + _dma_controller_do_remove_region(region); + if (dma->nregions > 1) + memcpy(region, &dma->regions[dma->nregions - 1], + sizeof *region); + dma->nregions--; + return 0; + } + } + return -ENOENT; +} + +static inline void +dma_controller_remove_regions(lm_ctx_t * const ctx, + dma_controller_t * const dma) +{ + int i; + + assert(dma); + + for (i = 0; i < dma->nregions; i++) { + dma_memory_region_t *region = &dma->regions[i]; + + lm_log(ctx, LM_INF, "unmap vaddr=%lx IOVA=%lx\n", + region->virt_addr, region->dma_addr); + + _dma_controller_do_remove_region(region); + } +} + +void +dma_controller_destroy(lm_ctx_t * const ctx, dma_controller_t * dma) +{ + dma_controller_remove_regions(ctx, dma); + free(dma); +} + +int +dma_controller_add_region(lm_ctx_t * const lm_ctx, dma_controller_t * dma, + dma_addr_t dma_addr, size_t size, + int fd, off_t offset) +{ + int idx; + dma_memory_region_t *region; + int page_size; + + for (idx = 0; idx < dma->nregions; idx++) { + region = &dma->regions[idx]; + + /* First check if this is the same exact region. */ + if (region->dma_addr == dma_addr && region->size == size) { + if (offset != region->offset) { + lm_log(lm_ctx, LM_ERR, "bad offset for new DMA region %lx+%lx, " + "want=%d, existing=%d\n", + dma_addr, size, offset, region->offset); + goto err; + } + if (!fds_are_same_file(region->fd, fd)) { + /* + * Printing the file descriptors here doesn't really make + * sense as they can be different but actually pointing to + * the same file, however in the majority of cases we'll be + * using a single fd. + */ + lm_log(lm_ctx, LM_ERR, "bad fd=%d for new DMA region %lx-%lx, " + "existing fd=%d\n", fd, region->fd); + goto err; + } + return idx; + } + + /* Check for overlap, i.e. start of one region is within another. */ + if ((dma_addr >= region->dma_addr && + dma_addr < region->dma_addr + region->size) || + (region->dma_addr >= dma_addr && + region->dma_addr < dma_addr + size)) { + lm_log(lm_ctx, LM_INF, "new DMA region %lx+%lx overlaps with DMA " + "region %lx-%lx\n", dma_addr, size, region->dma_addr, + region->size); + goto err; + } + } + + if (dma->nregions == dma->max_regions) { + idx = dma->max_regions; + lm_log(lm_ctx, LM_ERR, "reached maxed regions\n"); + goto err; + } + + idx = dma->nregions; + region = &dma->regions[idx]; + + page_size = fd_get_blocksize(fd); + if (page_size < 0) { + lm_log(lm_ctx, LM_ERR, "bad page size %d\n", page_size); + goto err; + } + page_size = MAX(page_size, getpagesize()); + + region->dma_addr = dma_addr; + region->size = size; + region->page_size = page_size; + region->offset = offset; + + region->fd = dup(fd); // dup the fd to get our own private copy + if (region->fd < 0) { + lm_log(lm_ctx, LM_ERR, "failed to duplicate file descriptor: %s\n", + strerror(errno)); + goto err; + } +#if DMA_MAP_FAST_IMPL + region->virt_addr = dma_map_region(region, PROT_READ | PROT_WRITE, + 0, region->size); + if (region->virt_addr == MAP_FAILED) { + lm_log(lm_ctx, LM_ERR, "failed to memory map DMA region %lx-%lx: %s\n", + dma_addr, dma_addr + size, strerror(errno)); + close(region->fd); + goto err; + } +#endif + + dma->nregions++; + + return idx; + +err: + return -idx - 1; +} + +static inline void +mmap_round(size_t * offset, size_t * size, size_t page_size) +{ + size_t offset_orig = *offset; + *offset = ROUND_DOWN(offset_orig, page_size); + *size = ROUND_UP(offset_orig + *size, page_size) - *offset; +} + +void * +dma_map_region(dma_memory_region_t * region, int prot, + size_t offset, size_t len) +{ + size_t mmap_offset, mmap_size = len; + char *mmap_base; + + if (offset >= region->size || offset + len > region->size) { + return MAP_FAILED; + } + + offset += region->offset; + mmap_offset = offset; + mmap_round(&mmap_offset, &mmap_size, region->page_size); + + // Do the mmap. + mmap_base = mmap(NULL, mmap_size, prot, MAP_SHARED, + region->fd, mmap_offset); + if (mmap_base == MAP_FAILED) { + return mmap_base; + } + // Do not dump. + madvise(mmap_base, mmap_size, MADV_DONTDUMP); + + return mmap_base + (offset - mmap_offset); +} + +void +dma_unmap_region(dma_memory_region_t * region, void *virt_addr, size_t len) +{ + mmap_round((size_t *) & virt_addr, &len, region->page_size); + munmap(virt_addr, len); +} + +int +_dma_addr_sg_split(lm_ctx_t * const ctx, const dma_controller_t * dma, + dma_addr_t dma_addr, uint32_t len, + dma_scattergather_t * sg, int max_sg) +{ + int idx; + int cnt = 0; + bool found = true; // Whether the current region is found. + + while (found && len > 0) { + found = false; + for (idx = 0; idx < dma->nregions; idx++) { + const dma_memory_region_t *const region = &dma->regions[idx]; + const dma_addr_t region_end = region->dma_addr + region->size; + + while (dma_addr >= region->dma_addr && dma_addr < region_end) { + size_t region_len = MIN(region_end - dma_addr, len); + + if (cnt < max_sg) { + sg[cnt].region = idx; + sg[cnt].offset = dma_addr - region->dma_addr; + sg[cnt].length = region_len; + } + + cnt++; + + // dma_addr found, may need to start from the top for the + // next dma_addr. + found = true; + dma_addr += region_len; + len -= region_len; + + if (len == 0) { + goto out; + } + } + } + } + +out: + if (!found) { + // There is still a region which was not found. + assert(len > 0); + cnt = -1; + } else if (cnt > max_sg) { + cnt = -cnt - 1; + } + return cnt; +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/dma.h b/lib/dma.h new file mode 100644 index 0000000..80afaec --- /dev/null +++ b/lib/dma.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Mike Cui <cui@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef DMA_DMA_H +#define DMA_DMA_H + +/* + * This library emulates a DMA controller for a device emulation application to + * perform DMA operations on a foreign memory space. + * + * Concepts: + * - A DMA controller has its own 64-bit DMA address space. + * - Foreign memory is made available to the DMA controller in linear chunks + * called memory regions. + * - Each memory region is backed by a file descriptor and + * is registered with the DMA controllers at a unique, non-overlapping + * linear span of the DMA address space. + * - To perform DMA, the application should first build a scatter-gather + * list (sglist) of dma_scattergather_t from DMA addresses. Then the sglist + * can be mapped using dma_map_sg() into the process's virtual address space + * as an iovec for direct access, and unmapped using dma_unmap_sg() when done. + * - dma_map_addr() and dma_unmap_addr() helper functions are provided + * for mapping DMA regions that can fit into one scatter-gather entry. + * + * This library can be compiled to function in two modes as defined by the + * following macros. + * - DMA_MAP_FAST (default): Every region is mapped into the application's + * virtual address space at registration time with R/W permissions. + * dma_map_sg() ignores all protection bits and only does lookups and + * returns pointers to the previously mapped regions. dma_unmap_sg() is + * effectively a no-op. + * - DMA_MAP_PROTECTED: Every call to dma_map_sg() does mmap()s and + * dma_unmap_sg() does munmap()s. All permission bits are honored. This mode + * is obviously much slower if used in the fast path. It may be useful to + * have the exta protection if the fast path does not need direct virtual + * memory access to foreign memory and data is accessed using a different + * method (e.g. RDMA, vfio-iommu). It can also be useful in debugging to + * make sure we are not writing to guest memory that's readonly for the + * device. + */ + +#ifdef DMA_MAP_PROTECTED +#undef DMA_MAP_FAST +#define DMA_MAP_FAST_IMPL 0 +#else +#define DMA_MAP_FAST_IMPL 1 +#endif + +#include <assert.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <stdint.h> +#include <stdlib.h> + +#include "muser.h" +#include "common.h" + +typedef struct { + dma_addr_t dma_addr; // DMA address of this region + size_t size; // Size of this region + int fd; // File descriptor to mmap + int page_size; // Page size of this fd + off_t offset; // File offset +#if DMA_MAP_FAST_IMPL + void *virt_addr; // Virtual address of this region +#endif +} dma_memory_region_t; + +typedef struct { + int max_regions; + int nregions; + dma_memory_region_t regions[0]; +} dma_controller_t; + +dma_controller_t *dma_controller_create(int max_regions); +void dma_controller_destroy(lm_ctx_t * const ctx, dma_controller_t * dma); + +/* Registers a new memory region. + * Returns: + * - On success, a non-negative region number + * - On failure, a negative integer (-x - 1) where x is the region number + * where this region would have been mapped to if the call could succeed + * (e.g. due to conflict with existing region). + */ +int dma_controller_add_region(lm_ctx_t * const ctx, dma_controller_t * dma, + dma_addr_t dma_addr, size_t size, + int fd, off_t offset); + +int dma_controller_remove_region(dma_controller_t * dma, dma_addr_t dma_addr, + size_t size, int fd); + +// Helper for dma_addr_to_sg() slow path. +int _dma_addr_sg_split(lm_ctx_t * const ctx, const dma_controller_t * dma, + dma_addr_t dma_addr, uint32_t len, + dma_scattergather_t * sg, int max_sg); + +/* Takes a linear dma address span and returns a sg list suitable for DMA. + * A single linear dma address span may need to be split into multiple + * scatter gather regions due to limitations of how memory can be mapped. + * + * Returns: + * - On success, number of scatter gather entries created. + * - On failure: + * -1 if the dma address span is invalid + * (-x - 1) if @max_sg is too small, where x is the number of sg entries + * necessary to complete this request. + */ +static inline int +dma_addr_to_sg(lm_ctx_t * const ctx, const dma_controller_t * dma, + dma_addr_t dma_addr, uint32_t len, + dma_scattergather_t * sg, int max_sg) +{ + static __thread int region_hint; + int cnt; + + const dma_memory_region_t *const region = &dma->regions[region_hint]; + const dma_addr_t region_end = region->dma_addr + region->size; + + // Fast path: single region. + if (likely(max_sg > 0 && len > 0 && + dma_addr >= region->dma_addr && dma_addr + len <= region_end)) { + sg->region = region_hint; + sg->offset = dma_addr - region->dma_addr; + sg->length = len; + return 1; + } + // Slow path: search through regions. + cnt = _dma_addr_sg_split(ctx, dma, dma_addr, len, sg, max_sg); + if (likely(cnt > 0)) { + region_hint = sg->region; + } + return cnt; +} + +void *dma_map_region(dma_memory_region_t * region, int prot, + size_t offset, size_t len); + +void dma_unmap_region(dma_memory_region_t * region, + void *virt_addr, size_t len); + +static inline int +dma_map_sg(dma_controller_t * dma, int prot, + const dma_scattergather_t * sg, struct iovec *iov, int cnt) +{ + int i; + + for (i = 0; i < cnt; i++) { + dma_memory_region_t *const region = &dma->regions[sg[i].region]; + +#if DMA_MAP_FAST_IMPL + iov[i].iov_base = (char *)region->virt_addr + sg[i].offset; +#else + iov[i].iov_base = dma_map_region(region, prot, + sg[i].offset, sg[i].length); + if (iov[i].iov_base == MAP_FAILED) { + return -1; + } +#endif + iov[i].iov_len = sg[i].length; + } + + return 0; +} + +static inline void +dma_unmap_sg(dma_controller_t * dma, + const dma_scattergather_t * sg, struct iovec *iov, int cnt) +{ + int i; + + for (i = 0; i < cnt; i++) { + dma_memory_region_t *const region = &dma->regions[sg[i].region]; + if (!DMA_MAP_FAST_IMPL) { + dma_unmap_region(region, iov[i].iov_base, iov[i].iov_len); + } + } +} + +static inline void * +dma_map_addr(lm_ctx_t * const ctx, dma_controller_t * dma, int prot, + dma_addr_t dma_addr, uint32_t len) +{ + dma_scattergather_t sg; + struct iovec iov; + + if (dma_addr_to_sg(ctx, dma, dma_addr, len, &sg, 1) == 1 && + dma_map_sg(dma, prot, &sg, &iov, 1) == 0) { + return iov.iov_base; + } + + return NULL; +} + +static inline void +dma_unmap_addr(lm_ctx_t * const ctx, dma_controller_t * dma, + dma_addr_t dma_addr, uint32_t len, void *addr) +{ + dma_scattergather_t sg; + struct iovec iov = { + .iov_base = addr, + .iov_len = len, + }; + int r; + + r = dma_addr_to_sg(ctx, dma, dma_addr, len, &sg, 1); + assert(r == 1); + + dma_unmap_sg(dma, &sg, &iov, 1); +} + +#endif /* DMA_DMA_H */ + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/libmuser.c b/lib/libmuser.c new file mode 100644 index 0000000..ba016fe --- /dev/null +++ b/lib/libmuser.c @@ -0,0 +1,1063 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include <assert.h> +#include <errno.h> +#include <stddef.h> +#include <sys/mman.h> +#include <stdarg.h> + +#include "../kmod/muser.h" +#include "muser.h" +#include "dma.h" + +typedef enum { + IRQ_NONE = 0, + IRQ_INTX, + IRQ_MSI, + IRQ_MSIX, +} irq_type_t; + +typedef struct { + irq_type_t type; /* irq type this device is using */ + int err_efd; /* eventfd for irq err */ + int req_efd; /* eventfd for irq req */ + uint32_t max_ivs; /* maximum number of ivs supported */ + int efds[0]; /* XXX must be last */ +} lm_irqs_t; + +/* + * Macro that ensures that a particular struct member is last. Doesn't work for + * flexible array members. + */ +#define MUST_BE_LAST(s, m, t) \ + _Static_assert(sizeof(s) - offsetof(s, m) == sizeof(t), \ + #t " " #m " must be last member in " #s) + +struct lm_ctx { + void *pvt; + dma_controller_t *dma; + int fd; + bool extended; + lm_fops_t fops; + lm_log_lvl_t log_lvl; + lm_log_fn_t *log; + lm_pci_info_t pci_info; + lm_pci_config_space_t *pci_config_space; + lm_irqs_t irqs; /* XXX must be last */ +}; +MUST_BE_LAST(struct lm_ctx, irqs, lm_irqs_t); + +#define LM_CTX_SIZE(irqs) (sizeof(lm_ctx_t) + sizeof(int) * irqs) +#define LM2VFIO_IRQT(type) (type - 1) + +void lm_log(const lm_ctx_t * const ctx, const lm_log_lvl_t lvl, + const char *const fmt, ...) +{ + va_list ap; + char buf[BUFSIZ]; + + assert(ctx); + + if (!ctx->log || lvl > ctx->log_lvl || !fmt) { + return; + } + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + ctx->log(ctx->pvt, buf); +} + +static long irqs_disable(lm_ctx_t * lm_ctx, uint32_t index) +{ + int *irq_efd = NULL; + uint32_t i; + + assert(lm_ctx != NULL); + assert(index < LM_DEV_NUM_IRQS); + + switch (index) { + case VFIO_PCI_INTX_IRQ_INDEX: + case VFIO_PCI_MSI_IRQ_INDEX: + case VFIO_PCI_MSIX_IRQ_INDEX: + lm_ctx->irqs.type = IRQ_NONE; + for (i = 0; i < lm_ctx->irqs.max_ivs; i++) { + if (lm_ctx->irqs.efds[i] >= 0) { + (void) close(lm_ctx->irqs.efds[i]); + lm_ctx->irqs.efds[i] = -1; + } + } + return 0; + case VFIO_PCI_ERR_IRQ_INDEX: + irq_efd = &lm_ctx->irqs.err_efd; + break; + case VFIO_PCI_REQ_IRQ_INDEX: + irq_efd = &lm_ctx->irqs.req_efd; + break; + } + + if (irq_efd != NULL) { + (void)close(*irq_efd); + *irq_efd = -1; + return 0; + } + + return -EINVAL; +} + +static int irqs_set_data_none(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) +{ + int efd, i; + long ret; + eventfd_t val; + + for (i = irq_set->start; i < irq_set->start + irq_set->count; i++) { + efd = lm_ctx->irqs.efds[i]; + if (efd >= 0) { + val = 1; + ret = eventfd_write(efd, val); + if (ret == -1) { + return -errno; + } + } + } + + return 0; +} + +static int +irqs_set_data_bool(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +{ + uint8_t *d8; + int efd, i; + long ret; + eventfd_t val; + + assert(data != NULL); + for (i = irq_set->start, d8 = data; i < irq_set->start + irq_set->count; + i++, d8++) { + efd = lm_ctx->irqs.efds[i]; + if (efd >= 0 && *d8 == 1) { + val = 1; + ret = eventfd_write(efd, val); + if (ret == -1) { + return -errno; + } + } + } + + return 0; +} + +static int +irqs_set_data_eventfd(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +{ + int32_t *d32; + int efd, i; + + assert(data != NULL); + for (i = irq_set->start, d32 = data; i < irq_set->start + irq_set->count; + i++, d32++) { + efd = lm_ctx->irqs.efds[i]; + if (efd >= 0) { + (void) close(efd); + lm_ctx->irqs.efds[i] = -1; + } + if (*d32 >= 0) { + lm_ctx->irqs.efds[i] = *d32; + } + } + + return 0; +} + +static long +irqs_trigger(lm_ctx_t * lm_ctx, struct vfio_irq_set *irq_set, void *data) +{ + int err = 0; + + assert(lm_ctx != NULL); + assert(irq_set != NULL); + + if (irq_set->count == 0) { + return irqs_disable(lm_ctx, irq_set->index); + } + + switch (irq_set->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_NONE: + err = irqs_set_data_none(lm_ctx, irq_set); + break; + case VFIO_IRQ_SET_DATA_BOOL: + err = irqs_set_data_bool(lm_ctx, irq_set, data); + break; + case VFIO_IRQ_SET_DATA_EVENTFD: + err = irqs_set_data_eventfd(lm_ctx, irq_set, data); + break; + } + + return err; +} + +static long +dev_set_irqs_validate(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) +{ + lm_pci_info_t *pci_info = &lm_ctx->pci_info; + uint32_t a_type, d_type; + + assert(lm_ctx != NULL); + assert(irq_set != NULL); + + // Separate action and data types from flags. + a_type = (irq_set->flags & VFIO_IRQ_SET_ACTION_TYPE_MASK); + d_type = (irq_set->flags & VFIO_IRQ_SET_DATA_TYPE_MASK); + + // Ensure index is within bounds. + if (irq_set->index >= LM_DEV_NUM_IRQS) { + return -EINVAL; + } + + /* TODO make each condition a function */ + + // Only one of MASK/UNMASK/TRIGGER is valid. + if ((a_type != VFIO_IRQ_SET_ACTION_MASK) && + (a_type != VFIO_IRQ_SET_ACTION_UNMASK) && + (a_type != VFIO_IRQ_SET_ACTION_TRIGGER)) { + return -EINVAL; + } + // Only one of NONE/BOOL/EVENTFD is valid. + if ((d_type != VFIO_IRQ_SET_DATA_NONE) && + (d_type != VFIO_IRQ_SET_DATA_BOOL) && + (d_type != VFIO_IRQ_SET_DATA_EVENTFD)) { + return -EINVAL; + } + // Ensure irq_set's start and count are within bounds. + if ((irq_set->start >= pci_info->irq_count[irq_set->index]) || + (irq_set->start + irq_set->count > pci_info->irq_count[irq_set->index])) { + return -EINVAL; + } + // Only TRIGGER is valid for ERR/REQ. + if (((irq_set->index == VFIO_PCI_ERR_IRQ_INDEX) || + (irq_set->index == VFIO_PCI_REQ_IRQ_INDEX)) && + (a_type != VFIO_IRQ_SET_ACTION_TRIGGER)) { + return -EINVAL; + } + // count == 0 is only valid with ACTION_TRIGGER and DATA_NONE. + if ((irq_set->count == 0) && ((a_type != VFIO_IRQ_SET_ACTION_TRIGGER) || + (d_type != VFIO_IRQ_SET_DATA_NONE))) { + return -EINVAL; + } + // If IRQs are set, ensure index matches what's enabled for the device. + if ((irq_set->count != 0) && (lm_ctx->irqs.type != IRQ_NONE) && + (irq_set->index != LM2VFIO_IRQT(lm_ctx->irqs.type))) { + return -EINVAL; + } + + return 0; +} + +static long +dev_set_irqs(lm_ctx_t * lm_ctx, struct vfio_irq_set *irq_set, void *data) +{ + long ret; + + assert(lm_ctx != NULL); + assert(irq_set != NULL); + + // Ensure irq_set is valid. + ret = dev_set_irqs_validate(lm_ctx, irq_set); + if (ret != 0) { + return ret; + } + + switch (irq_set->flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: // fallthrough + case VFIO_IRQ_SET_ACTION_UNMASK: + // We're always edge-triggered without un/mask support. + return 0; + } + + return irqs_trigger(lm_ctx, irq_set, data); +} + +static long dev_get_irqinfo(lm_ctx_t * lm_ctx, struct vfio_irq_info *irq_info) +{ + assert(lm_ctx != NULL); + assert(irq_info != NULL); + lm_pci_info_t *pci_info = &lm_ctx->pci_info; + + // Ensure provided argsz is sufficiently big and index is within bounds. + if ((irq_info->argsz < sizeof(struct vfio_irq_info)) || + (irq_info->index >= LM_DEV_NUM_IRQS)) { + return -EINVAL; + } + + irq_info->count = pci_info->irq_count[irq_info->index]; + irq_info->flags = VFIO_IRQ_INFO_EVENTFD; + + return 0; +} + +static long +dev_get_reginfo(lm_ctx_t * lm_ctx, struct vfio_region_info *reg_info) +{ + assert(lm_ctx != NULL); + assert(reg_info != NULL); + lm_pci_info_t *pci_info = &lm_ctx->pci_info; + + // Ensure provided argsz is sufficiently big and index is within bounds. + if ((reg_info->argsz < sizeof(struct vfio_region_info)) || + (reg_info->index >= LM_DEV_NUM_REGS)) { + return -EINVAL; + } + + reg_info->offset = pci_info->reg_info[reg_info->index].offset; + reg_info->flags = pci_info->reg_info[reg_info->index].flags; + reg_info->size = pci_info->reg_info[reg_info->index].size; + + lm_log(lm_ctx, LM_DBG, "region_info[%d]\n", reg_info->index); + dump_buffer(lm_ctx, "", (unsigned char *)reg_info, sizeof *reg_info); + + return 0; +} + +static long dev_get_info(struct vfio_device_info *dev_info) +{ + assert(dev_info != NULL); + + // Ensure provided argsz is sufficiently big. + if (dev_info->argsz < sizeof(struct vfio_device_info)) { + return -EINVAL; + } + + dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET; + dev_info->num_regions = LM_DEV_NUM_REGS; + dev_info->num_irqs = LM_DEV_NUM_IRQS; + + return 0; +} + +static long +do_muser_ioctl(lm_ctx_t * lm_ctx, struct muser_cmd_ioctl *cmd_ioctl, void *data) +{ + int err = -ENOTSUP; + + assert(lm_ctx != NULL); + switch (cmd_ioctl->vfio_cmd) { + case VFIO_DEVICE_GET_INFO: + err = dev_get_info(&cmd_ioctl->data.dev_info); + break; + case VFIO_DEVICE_GET_REGION_INFO: + err = dev_get_reginfo(lm_ctx, &cmd_ioctl->data.reg_info); + break; + case VFIO_DEVICE_GET_IRQ_INFO: + err = dev_get_irqinfo(lm_ctx, &cmd_ioctl->data.irq_info); + break; + case VFIO_DEVICE_SET_IRQS: + err = dev_set_irqs(lm_ctx, &cmd_ioctl->data.irq_set, data); + break; + case VFIO_DEVICE_RESET: + if (lm_ctx->fops.reset) { + return lm_ctx->fops.reset(lm_ctx->pvt); + } + } + + return err; +} + +static int muser_dma_unmap(lm_ctx_t * lm_ctx, struct muser_cmd *cmd) +{ + int err; + + lm_log(lm_ctx, LM_INF, "removing DMA region %lx-%lx\n", + cmd->mmap.request.start, cmd->mmap.request.end); + + if (lm_ctx->dma == NULL) { + lm_log(lm_ctx, LM_ERR, "DMA not initialized\n"); + cmd->mmap.response.addr = -1; + return -1; + } + + err = dma_controller_remove_region(lm_ctx->dma, + cmd->mmap.request.start, + cmd->mmap.request.end - + cmd->mmap.request.start, lm_ctx->fd); + if (err != 0) { + lm_log(lm_ctx, LM_ERR, "failed to remove DMA region %lx-%lx: %s\n", + cmd->mmap.request.start, cmd->mmap.request.end, strerror(err)); + } + + cmd->mmap.response.addr = err; + + return err; +} + +static int muser_dma_map(lm_ctx_t * lm_ctx, struct muser_cmd *cmd) +{ + int err; + + lm_log(lm_ctx, LM_INF, "adding DMA region %lx-%lx\n", + cmd->mmap.request.start, cmd->mmap.request.end); + + if (lm_ctx->dma == NULL) { + lm_log(lm_ctx, LM_ERR, "DMA not initialized\n"); + cmd->mmap.response.addr = -1; + return -1; + } + + if (cmd->mmap.request.start >= cmd->mmap.request.end) { + lm_log(lm_ctx, LM_ERR, "bad DMA region %lx-%lx\n", + cmd->mmap.request.start, cmd->mmap.request.end); + cmd->mmap.response.addr = -1; + return -1; + } + err = dma_controller_add_region(lm_ctx, lm_ctx->dma, + cmd->mmap.request.start, + cmd->mmap.request.end - + cmd->mmap.request.start, lm_ctx->fd, 0); + if (err < 0) { + lm_log(lm_ctx, LM_ERR, "failed to add DMA region %lx-%lx: %d\n", + cmd->mmap.request.start, cmd->mmap.request.end, err); + cmd->mmap.response.addr = -1; + return -1; + } + + // TODO: Are we just abusing response.addr as a rc? + cmd->mmap.response.addr = 0; + + return 0; +} + +static int muser_mmap(lm_ctx_t * lm_ctx, struct muser_cmd *cmd) +{ + unsigned long addr; + unsigned long start = cmd->mmap.request.start; + unsigned long end = cmd->mmap.request.end; + unsigned long pgoff = cmd->mmap.request.pgoff; + + addr = lm_ctx->fops.mmap(lm_ctx->pvt, pgoff); + cmd->mmap.response.addr = addr; + + if ((void *)addr == MAP_FAILED) { + cmd->err = -1; + return -1; + } + + return 0; +} + +static int +post_read(lm_ctx_t * const lm_ctx, struct muser_cmd *const cmd, + char *const data, const size_t offset, ssize_t ret) +{ + if (ret != cmd->rw.count) { + /* FIXME shouldn't we still reply to the kernel in case of error? */ + lm_log(lm_ctx, LM_ERR, "%s: bad fops read: %d/%d, %s\n", + __func__, ret, cmd->rw.count, strerror(errno)); + return ret; + } + + /* + * TODO the kernel will first copy the command and then will use the .buf + * pointer to copy the data. Does it make sense to use writev in order to + * get rid of the .buf member? THe 1st element of the iovec will be the + * command and the 2nd the data. + */ + cmd->rw.buf = data; + ret = write(lm_ctx->fd, cmd, sizeof(*cmd)); + if ((int)ret != sizeof(*cmd)) { + lm_log(lm_ctx, LM_ERR, "%s: bad muser write: %d/%d, %s\n", + __func__, ret, sizeof(*cmd), strerror(errno)); + } + return ret; +} + +int +lm_get_region(lm_ctx_t * const lm_ctx, const loff_t pos, const size_t count, + loff_t * const off) +{ + assert(lm_ctx); + assert(off); + lm_pci_info_t *pci_info = &lm_ctx->pci_info; + + int i; + + for (i = 0; i < LM_DEV_NUM_REGS; i++) { + const lm_reg_info_t * const reg_info = &pci_info->reg_info[i]; + if (pos >= reg_info->offset) { + if (pos - reg_info->offset + count <= reg_info->size) { + *off = pos - reg_info->offset; + return i; + } + } + } + return -ENOENT; +} + +static ssize_t +do_access(lm_ctx_t * const lm_ctx, char * const buf, size_t count, loff_t pos, + const bool is_write) +{ + int idx; + loff_t offset; + int ret = -EINVAL; + lm_pci_info_t *pci_info; + + assert(lm_ctx != NULL); + assert(buf != NULL); + assert(count > 0); + + pci_info = &lm_ctx->pci_info; + idx = lm_get_region(lm_ctx, pos, count, &offset); + if (idx < 0) { + lm_log(lm_ctx, LM_ERR, "invalid region %d\n", idx); + return idx; + } + + /* + * TODO we should check at device registration time that all necessary + * callbacks are there in order to avoid having to check at runtime + */ + switch (idx) { + case LM_DEV_BAR0_REG_IDX ... LM_DEV_BAR5_REG_IDX: + if (pci_info->bar_fn) + return pci_info->bar_fn(lm_ctx->pvt, idx, buf, count, offset, is_write); + break; + case LM_DEV_ROM_REG_IDX: + if (pci_info->rom_fn) + return pci_info->rom_fn(lm_ctx->pvt, buf, count, offset, is_write); + break; + case LM_DEV_CFG_REG_IDX: + if (pci_info->pci_config_fn) + return pci_info->pci_config_fn(lm_ctx->pvt, buf, count, offset, + is_write); + break; + case LM_DEV_VGA_REG_IDX: + if (pci_info->vga_fn) + return pci_info->vga_fn(lm_ctx->pvt, buf, count, offset, is_write); + break; + default: + lm_log(lm_ctx, LM_ERR, "bad region %d\n", idx); + return ret; + } + + if (is_write && lm_ctx->fops.write) { + ret = lm_ctx->fops.write(lm_ctx->pvt, idx, buf, count, pos); + } else if (lm_ctx->fops.read) { + ret = lm_ctx->fops.read(lm_ctx->pvt, idx, buf, count, pos); + } else { + lm_log(lm_ctx, LM_ERR, "no R/W callback, region %d, %x@%lx\n", + idx, count, pos); + } + + return ret; +} + +/* + * TODO function name same lm_access_t, fix + */ +ssize_t +lm_access(lm_ctx_t * const lm_ctx, char *buf, size_t count, + loff_t * const ppos, const bool is_write) +{ + unsigned int done = 0; + int ret; + + assert(lm_ctx != NULL); + /* buf and ppos can be NULL if count is 0 */ + + while (count) { + size_t size; + if (count >= 8 && !(*ppos % 8)) { + size = 8; + } else if (count >= 4 && !(*ppos % 4)) { + size = 4; + } else if (count >= 2 && !(*ppos % 2)) { + size = 2; + } else { + size = 1; + } + ret = do_access(lm_ctx, buf, size, *ppos, is_write); + if (ret <= 0) { + lm_log(lm_ctx, LM_ERR, "failed to %s %lx@%llx: %s\n", + is_write ? "write" : "read", *ppos, size, strerror(-ret)); + return -EFAULT; + } + count -= size; + done += size; + *ppos += size; + buf += size; + } + return done; +} + + +static inline int +muser_access(lm_ctx_t * const lm_ctx, struct muser_cmd *const cmd, + const bool is_write) +{ + char *data; + int err; + unsigned int i; + size_t count = 0; + ssize_t ret; + + /* TODO how big do we expect count to be? Can we use alloca(3) instead? */ + data = calloc(1, cmd->rw.count); + if (data == NULL) { + lm_log(lm_ctx, LM_ERR, "failed to allocate memory\n"); + return -1; + } + + lm_log(lm_ctx, LM_DBG, "%s %x@%lx\n", is_write ? "W" : "R", cmd->rw.count, + cmd->rw.pos); + + /* copy data to be written from kernel to user space */ + if (is_write) { + err = read(lm_ctx->fd, data, cmd->rw.count); + /* + * FIXME this is wrong, we should be checking for + * err != cmd->rw.count + */ + if (err < 0) { + lm_log(lm_ctx, LM_ERR, "failed to read from kernel: %s\n", + strerror(errno)); + goto out; + } + err = 0; + dump_buffer(lm_ctx, "buffer write", data, cmd->rw.count); + } + + count = cmd->rw.count; + cmd->err = muser_pci_hdr_access(lm_ctx, &cmd->rw.count, &cmd->rw.pos, + is_write, data); + if (cmd->err) { + lm_log(lm_ctx, LM_ERR, "failed to access PCI header: %d\n", cmd->err); + } + count -= cmd->rw.count; + ret = lm_access(lm_ctx, data + count, cmd->rw.count, &cmd->rw.pos, + is_write); + if (!is_write) { + err = post_read(lm_ctx, cmd, data, count, ret); + dump_buffer(lm_ctx, "buffer read", data, cmd->rw.count); + } + +out: + free(data); + + return err; +} + +static int +muser_ioctl(lm_ctx_t * lm_ctx, struct muser_cmd *cmd) +{ + void *data = NULL; + size_t size = 0; + int ret; + + /* TODO make this a function that returns the size */ + if (cmd->ioctl.vfio_cmd == VFIO_DEVICE_SET_IRQS) { + uint32_t flags = cmd->ioctl.data.irq_set.flags; + switch ((flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) { + case VFIO_IRQ_SET_DATA_EVENTFD: + size = sizeof(int32_t) * cmd->ioctl.data.irq_set.count; + break; + case VFIO_IRQ_SET_DATA_BOOL: + size = sizeof(uint8_t) * cmd->ioctl.data.irq_set.count; + break; + } + } + + if (size != 0) { + data = calloc(1, size); + if (data == NULL) { +#ifdef DEBUG + perror("calloc"); +#endif + return -1; + } + + ret = read(lm_ctx->fd, data, size); + if (ret < 0) { +#ifdef DEBUG + perror("read failed"); +#endif + goto out; + } + } + + ret = (int)do_muser_ioctl(lm_ctx, &cmd->ioctl, data); + +out: + + free(data); + return ret; +} + +static int drive_loop(lm_ctx_t *lm_ctx) +{ + struct muser_cmd cmd = { 0 }; + int err; + size_t size; + unsigned int i; + + do { + err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_WAIT, &cmd); + if (err < 0) { + return err; + } + + switch (cmd.type) { + case MUSER_IOCTL: + err = muser_ioctl(lm_ctx, &cmd); + break; + case MUSER_READ: + case MUSER_WRITE: + err = muser_access(lm_ctx, &cmd, cmd.type == MUSER_WRITE); + break; + case MUSER_MMAP: + err = muser_mmap(lm_ctx, &cmd); + break; + case MUSER_DMA_MMAP: + err = muser_dma_map(lm_ctx, &cmd); + break; + case MUSER_DMA_MUNMAP: + err = muser_dma_unmap(lm_ctx, &cmd); + break; + default: + lm_log(lm_ctx, LM_ERR, "bad command %d\n", cmd.type); + continue; + } + cmd.err = err; + err = ioctl(lm_ctx->fd, MUSER_DEV_CMD_DONE, &cmd); + if (err < 0) { + lm_log(lm_ctx, LM_ERR, "failed to complete command: %s\n", + strerror(errno)); + } + // TODO: Figure out a clean way to get out of the loop. + } while (1); + + return err; +} + +int +lm_ctx_drive(lm_ctx_t * lm_ctx) +{ + + if (lm_ctx == NULL) { + errno = EINVAL; + return -1; + } + + return drive_loop(lm_ctx); +} + +static int +dev_detach(int dev_fd) +{ + return close(dev_fd); +} + +static int +dev_attach(const char *uuid) +{ + char *path; + int dev_fd; + int err; + + err = asprintf(&path, "/dev/" MUSER_DEVNODE "/%s", uuid); + if (err != (int)(strlen(MUSER_DEVNODE) + strlen(uuid) + 6)) { + return -1; + } + + dev_fd = open(path, O_RDWR); + + free(path); + + return dev_fd; +} + +void * +lm_mmap(lm_ctx_t * lm_ctx, size_t length, off_t offset) +{ + off_t lm_off; + + if ((lm_ctx == NULL) || (length == 0) || !PAGE_ALIGNED(offset)) { + errno = EINVAL; + return MAP_FAILED; + } + + lm_off = offset | BIT(63); + return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, + lm_ctx->fd, lm_off); +} + +int +lm_irq_trigger(lm_ctx_t * lm_ctx, uint32_t vector) +{ + eventfd_t val = 1; + + if ((lm_ctx == NULL) || (vector >= lm_ctx->irqs.max_ivs)) { + errno = EINVAL; + return -1; + } + + if (lm_ctx->irqs.efds[vector] == -1) { + errno = ENOENT; + return -1; + } + + return eventfd_write(lm_ctx->irqs.efds[vector], val); +} + +void +lm_ctx_destroy(lm_ctx_t * lm_ctx) +{ + if (lm_ctx == NULL) { + return; + } + + free(lm_ctx->pci_config_space); + dev_detach(lm_ctx->fd); + if (lm_ctx->dma != NULL) { + dma_controller_destroy(lm_ctx, lm_ctx->dma); + } + free(lm_ctx); + // FIXME: Maybe close any open irq efds? Unmap stuff? +} + +static void +init_pci_hdr(lm_pci_hdr_t * const hdr, const lm_pci_hdr_id_t * const id, + const lm_pci_hdr_cc_t * const cc) +{ + assert(hdr); + assert(id); + assert(cc); + + hdr->id = *id; + hdr->cc = *cc; + + hdr->ss.vid = hdr->id.vid; + hdr->ss.sid = hdr->id.did; +} + +lm_ctx_t * +lm_ctx_create(lm_dev_info_t * const dev_info) +{ + lm_ctx_t *lm_ctx; + uint32_t max_ivs = 0; + uint32_t i; + int err = 0; + size_t size; + + if (dev_info == NULL) { + err = EINVAL; + goto out; + } + + for (i = 0; i < LM_DEV_NUM_IRQS; i++) { + if (max_ivs < dev_info->pci_info.irq_count[i]) { + max_ivs = dev_info->pci_info.irq_count[i]; + } + } + + lm_ctx = calloc(1, LM_CTX_SIZE(max_ivs)); + if (lm_ctx == NULL) { + err = errno; + goto out; + } + + memcpy(&lm_ctx->pci_info, &dev_info->pci_info, sizeof(lm_pci_info_t)); + + lm_ctx->fd = dev_attach(dev_info->uuid); + if (lm_ctx->fd == -1) { + err = errno; + goto out; + } + + if (dev_info->nr_dma_regions > 0) { + lm_ctx->dma = dma_controller_create(dev_info->nr_dma_regions); + if (lm_ctx->dma == NULL) { + err = errno; + goto out; + } + } + + lm_ctx->pci_info.irq_count[LM_DEV_ERR_IRQ_IDX] = 1; + lm_ctx->pci_info.irq_count[LM_DEV_REQ_IRQ_IDX] = 1; + + lm_ctx->extended = dev_info->extended; + if (lm_ctx->extended) { + size = PCI_EXTENDED_CONFIG_SPACE_SIZEOF; + } else { + size = PCI_CONFIG_SPACE_SIZEOF; + } + lm_ctx->pci_config_space = calloc(PCI_EXTENDED_CONFIG_SPACE_SIZEOF, 1); + if (!lm_ctx->pci_config_space) { + err = errno; + goto out; + } + + init_pci_hdr(&lm_ctx->pci_config_space->hdr, &dev_info->id, &dev_info->cc); + for (i = 0; i < ARRAY_SIZE(lm_ctx->pci_config_space->hdr.bars); i++) { + if ((dev_info->pci_info.reg_info[i].flags & LM_REG_FLAG_MEM) == 0) { + lm_ctx->pci_config_space->hdr.bars[i].io.region_type |= 0x1; + } + } + + lm_ctx->fops = dev_info->fops; + lm_ctx->pvt = dev_info->pvt; + + for (i = 0; i < max_ivs; i++) { + lm_ctx->irqs.efds[i] = -1; + } + lm_ctx->irqs.err_efd = -1; + lm_ctx->irqs.req_efd = -1; + lm_ctx->irqs.type = IRQ_NONE; + lm_ctx->irqs.max_ivs = max_ivs; + + lm_ctx->log = dev_info->log; + lm_ctx->log_lvl = dev_info->log_lvl; + + lm_ctx->pci_info.bar_fn = dev_info->pci_info.bar_fn; + lm_ctx->pci_info.rom_fn = dev_info->pci_info.rom_fn; + lm_ctx->pci_info.pci_config_fn = dev_info->pci_info.pci_config_fn; + lm_ctx->pci_info.vga_fn = dev_info->pci_info.vga_fn; + +out: + if (err) { + if (lm_ctx) { + dev_detach(lm_ctx->fd); + free(lm_ctx->pci_config_space); + free(lm_ctx); + lm_ctx = NULL; + } + errno = err; + } + return lm_ctx; +} + +void +dump_buffer(lm_ctx_t const *const lm_ctx, char const *const prefix, + unsigned char const *const buf, const uint32_t count) +{ +#ifdef DEBUG + int i; + const size_t bytes_per_line = 0x8; + + if (strcmp(prefix, "")) { + lm_log(lm_ctx, LM_DBG, "%s\n", prefix); + } + for (i = 0; i < (int)count; i++) { + if (i % bytes_per_line != 0) { + lm_log(lm_ctx, LM_DBG, " "); + } + /* TODO valgrind emits a warning if count is 1 */ + lm_log(lm_ctx, LM_DBG, "0x%02x", *(buf + i)); + if ((i + 1) % bytes_per_line == 0) { + lm_log(lm_ctx, LM_DBG, "\n"); + } + } + if (i % bytes_per_line != 0) { + lm_log(lm_ctx, LM_DBG, "\n"); + } +#endif +} + +/* + * Returns a pointer to the standard part of the PCI configuration space. + */ +inline lm_pci_config_space_t * +lm_get_pci_config_space(lm_ctx_t * const lm_ctx) +{ + assert(lm_ctx != NULL); + return lm_ctx->pci_config_space; +} + +/* + * Returns a pointer to the non-standard part of the PCI configuration space. + */ +inline uint8_t * +lm_get_pci_non_std_config_space(lm_ctx_t * const lm_ctx) +{ + assert(lm_ctx != NULL); + return (uint8_t *) & lm_ctx->pci_config_space->non_std; +} + +inline lm_reg_info_t * +lm_get_region_info(lm_ctx_t * const lm_ctx) +{ + assert(lm_ctx != NULL); + return lm_ctx->pci_info.reg_info; +} + +inline int +lm_addr_to_sg(lm_ctx_t * const lm_ctx, dma_addr_t dma_addr, + uint32_t len, dma_scattergather_t * sg, int max_sg) +{ + return dma_addr_to_sg(lm_ctx, lm_ctx->dma, dma_addr, len, sg, max_sg); +} + +inline int +lm_map_sg(lm_ctx_t * const lm_ctx, int prot, + const dma_scattergather_t * sg, struct iovec *iov, int cnt) +{ + return dma_map_sg(lm_ctx->dma, prot, sg, iov, cnt); +} + +inline void +lm_unmap_sg(lm_ctx_t * const lm_ctx, const dma_scattergather_t * sg, + struct iovec *iov, int cnt) +{ + return dma_unmap_sg(lm_ctx->dma, sg, iov, cnt); +} + +int +lm_ctx_run(lm_ctx_t * const lm_ctx) +{ + int ret = lm_ctx_drive(lm_ctx); + + lm_ctx_destroy(lm_ctx); + return ret; +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/libmuser_pci.c b/lib/libmuser_pci.c new file mode 100644 index 0000000..df45336 --- /dev/null +++ b/lib/libmuser_pci.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <sys/param.h> +#include <errno.h> + +#include <linux/pci_regs.h> +#include <linux/vfio.h> + +#include "muser.h" +#include "pci.h" +#include "common.h" + +static inline void +muser_pci_hdr_write_bar(lm_ctx_t * const lm_ctx, const uint16_t bar_index, + const char *const buf) +{ + uint32_t cfg_addr; + uint32_t *bar; + unsigned long mask; + lm_reg_info_t *reg_info = lm_get_region_info(lm_ctx); + + assert(lm_ctx); + + if (reg_info[bar_index].size == 0) { + return; + } + + bar = (uint32_t *) & lm_get_pci_config_space(lm_ctx)->hdr.bars[bar_index]; + cfg_addr = *(uint32_t *) buf; + + lm_log(lm_ctx, LM_DBG, "BAR%d addr 0x%x\n", bar_index, cfg_addr); + + if (cfg_addr == 0xffffffff) { + cfg_addr = ~(reg_info[bar_index].size) + 1; + } + + if ((reg_info[bar_index].flags & LM_REG_FLAG_MEM)) { + mask = PCI_BASE_ADDRESS_MEM_MASK; + } else { + mask = PCI_BASE_ADDRESS_IO_MASK; + } + cfg_addr |= (*bar & ~mask); + + *bar = htole32(cfg_addr); +} + +#define BAR_INDEX(offset) ((offset - PCI_BASE_ADDRESS_0) >> 2) + +static int +handle_command_write(lm_ctx_t * const ctx, lm_pci_config_space_t * const pci, + const char * const buf, const size_t count) +{ + uint16_t v; + + assert(ctx); + + if (count != 2) { + lm_log(ctx, LM_ERR, "bad write command size %d\n", count); + return -EINVAL; + } + + assert(pci); + assert(buf); + + v = *(uint16_t*)buf; + + if ((v & PCI_COMMAND_IO) == PCI_COMMAND_IO) { + if (!pci->hdr.cmd.iose) { + pci->hdr.cmd.iose = 0x1; + lm_log(ctx, LM_INF, "I/O space enabled\n"); + } + v &= ~PCI_COMMAND_IO; + } else { + if (pci->hdr.cmd.iose) { + pci->hdr.cmd.iose = 0x0; + lm_log(ctx, LM_INF, "I/O space disabled\n"); + } + } + + if ((v & PCI_COMMAND_MEMORY) == PCI_COMMAND_MEMORY) { + if (!pci->hdr.cmd.mse) { + pci->hdr.cmd.mse = 0x1; + lm_log(ctx, LM_INF, "memory space enabled\n"); + } + v &= ~PCI_COMMAND_MEMORY; + } else { + if (pci->hdr.cmd.mse) { + pci->hdr.cmd.mse = 0x0; + lm_log(ctx, LM_INF, "memory space disabled\n"); + } + } + + if ((v & PCI_COMMAND_MASTER) == PCI_COMMAND_MASTER) { + if (!pci->hdr.cmd.bme) { + pci->hdr.cmd.bme = 0x1; + lm_log(ctx, LM_INF, "bus master enabled\n"); + } + v &= ~PCI_COMMAND_MASTER; + } else { + if (pci->hdr.cmd.bme) { + pci->hdr.cmd.bme = 0x0; + lm_log(ctx, LM_INF, "bus master disabled\n"); + } + } + + if ((v & PCI_COMMAND_SERR) == PCI_COMMAND_SERR) { + if (!pci->hdr.cmd.see) { + pci->hdr.cmd.see = 0x1; + lm_log(ctx, LM_INF, "SERR# enabled\n"); + } + v &= ~PCI_COMMAND_SERR; + } else { + if (pci->hdr.cmd.see) { + pci->hdr.cmd.see = 0x0; + lm_log(ctx, LM_INF, "SERR# disabled\n"); + } + } + + if ((v & PCI_COMMAND_INTX_DISABLE) == PCI_COMMAND_INTX_DISABLE) { + if (!pci->hdr.cmd.id) { + pci->hdr.cmd.id = 0x1; + lm_log(ctx, LM_INF, "INTx emulation enabled\n"); + } + v &= ~PCI_COMMAND_INTX_DISABLE; + } else { + if (pci->hdr.cmd.id) { + pci->hdr.cmd.id = 0x0; + lm_log(ctx, LM_INF, "INTx emulation disabled\n"); + } + } + + if (v) { + lm_log(ctx, LM_ERR, "unconsumed command flags %x\n", v); + return -EINVAL; + } + + return 0; +} + +static int +handle_erom_write(lm_ctx_t * const ctx, lm_pci_config_space_t * const pci, + const char *const buf, const size_t count) +{ + uint32_t v; + + assert(ctx); + assert(pci); + + if (count != 0x4) { + lm_log(ctx, LM_ERR, "bad EROM count %d\n", count); + return -EINVAL; + } + v = *(uint32_t*)buf; + + if (v == (uint32_t)PCI_ROM_ADDRESS_MASK) { + lm_log(ctx, LM_INF, "write mask to EROM ignored\n"); + } else if (v == 0) { + lm_log(ctx, LM_INF, "cleared EROM\n"); + pci->hdr.erom = 0; + } else if (v == ~PCI_ROM_ADDRESS_ENABLE) { + lm_log(ctx, LM_INF, "EROM disable ignored\n"); + } else { + lm_log(ctx, LM_ERR, "bad write to EROM 0x%x bytes\n", v); + return -EINVAL; + } + return 0; +} + +static inline int +muser_pci_hdr_write(lm_ctx_t * const lm_ctx, const uint16_t offset, + const char *const buf, const size_t count) +{ + uint32_t *bar; + lm_pci_config_space_t *pci; + int ret = 0; + + assert(lm_ctx); + assert(buf); + + pci = lm_get_pci_config_space(lm_ctx); + + switch (offset) { + case PCI_COMMAND: + ret = handle_command_write(lm_ctx, pci, buf, count); + break; + case PCI_STATUS: + lm_log(lm_ctx, LM_INF, "write to status ignored\n"); + break; + case PCI_INTERRUPT_PIN: + lm_log(lm_ctx, LM_ERR, "attempt to write read-only field IPIN\n"); + ret = -EINVAL; + break; + case PCI_INTERRUPT_LINE: + pci->hdr.intr.iline = buf[0]; + break; + case PCI_LATENCY_TIMER: + pci->hdr.mlt = (uint8_t)buf[0]; + lm_log(lm_ctx, LM_INF, "set to latency timer to %hhx\n", pci->hdr.mlt); + break; + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_4: + case PCI_BASE_ADDRESS_5: + muser_pci_hdr_write_bar(lm_ctx, BAR_INDEX(offset), buf); + break; + case PCI_ROM_ADDRESS: + ret = handle_erom_write(lm_ctx, pci, buf, count); + break; + default: + lm_log(lm_ctx, LM_INF, "PCI config write %x@%x not handled\n", + count, offset); + ret = -EINVAL; + } + + dump_buffer(lm_ctx, "PCI header", pci->hdr.raw, 0xff); + + return ret; +} + +/* + * @pci_hdr: the PCI header + * @reg_info: region info + * @rw: the command + * @write: whether this is a PCI header write + * @count: output parameter that receives the number of bytes read/written + */ +static inline int +muser_do_pci_hdr_access(lm_ctx_t * const lm_ctx, size_t * const count, + loff_t * const pos, const bool is_write, + unsigned char *const buf) +{ + size_t _count; + loff_t _pos; + int err = 0; + + assert(lm_ctx); + assert(count); + assert(pos); + assert(buf); + + _pos = *pos - lm_get_region_info(lm_ctx)[LM_DEV_CFG_REG_IDX].offset; + _count = MIN(*count, PCI_STD_HEADER_SIZEOF - _pos); + + if (is_write) { + err = muser_pci_hdr_write(lm_ctx, _pos, buf, _count); + } else { + memcpy(buf, lm_get_pci_config_space(lm_ctx)->hdr.raw + _pos, _count); + } + *pos += _count; + *count -= _count; + return err; +} + +static inline bool +muser_is_pci_hdr_access(const lm_reg_info_t * const reg_info, const loff_t pos) +{ + const off_t off = (loff_t) reg_info[LM_DEV_CFG_REG_IDX].offset; + return pos - off >= 0 && pos - off < PCI_STD_HEADER_SIZEOF; +} + +int +muser_pci_hdr_access(lm_ctx_t * const lm_ctx, size_t * const count, + loff_t * const pos, const bool is_write, + unsigned char *const buf) +{ + assert(lm_ctx); + assert(count); + assert(pos); + + if (!muser_is_pci_hdr_access(lm_get_region_info(lm_ctx), *pos)) { + return 0; + } + return muser_do_pci_hdr_access(lm_ctx, count, pos, is_write, buf); +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/msicap.h b/lib/msicap.h new file mode 100644 index 0000000..bfcf1cd --- /dev/null +++ b/lib/msicap.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +struct mid { + unsigned int next:8; + unsigned int cid:8; +} __attribute__ ((packed)); +_Static_assert(sizeof(struct mid) == 0x2, "bad MID size"); + +struct mc { + unsigned int msie:1; + unsigned int mmc:3; + unsigned int mme:3; + unsigned int c64:1; + unsigned int pvm:1; + unsigned int res1:7; +} __attribute__ ((packed)); +_Static_assert(sizeof(struct mc) == 0x2, "bad MC size"); + +struct ma { + unsigned int res1:2; + unsigned int addr:30; +} __attribute__ ((packed)); +_Static_assert(sizeof(struct ma) == 0x4, "bad MA size"); + +struct msicap { + struct mid mid; + struct mc mc; + struct ma ma; + uint32_t mua; + uint16_t md; + uint16_t padding; + uint32_t mmask; + uint32_t mpend; +} __attribute__ ((packed)); +_Static_assert(sizeof(struct msicap) == 0x18, "bad MSICAP size"); + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/muser.h b/lib/muser.h new file mode 100644 index 0000000..a844f5c --- /dev/null +++ b/lib/muser.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef LIB_MUSER_H +#define LIB_MUSER_H + +#include <stdint.h> +#include <sys/uio.h> +#include <unistd.h> + +#include "pci.h" + +/** + * lm_fops_t - driver callbacks + * + * @read: read device configuration space + * @write: write device configuration space + * @mmap: mmap device configuration space + * @reset: reset the device + */ +typedef struct { + ssize_t (*read) (void *pvt, const int index, char *buf, size_t count, + loff_t pos); + ssize_t (*write) (void *pvt, const int index, char *buf, size_t count, + loff_t pos); + unsigned long (*mmap) (void *pvt, unsigned long pgoff); + int (*reset) (void *pvt); +} lm_fops_t; + + +/** + * Callback function signatures for each regions. + * + * @lm_bar_access_t: typedef for BAR access function. + * @lm_non_bar_access_t: typedef for non-BAR(rom, pci config, + * vga) access functions. + */ +typedef ssize_t (lm_bar_access_t) (void *pvt, const int region_index, + char * const buf, size_t count, + loff_t offset, const bool is_write); +typedef ssize_t (lm_non_bar_access_t) (void *pvt, char * const buf, + size_t count, loff_t offset, + const bool is_write); +typedef struct { + uint32_t irq_count[LM_DEV_NUM_IRQS]; + lm_reg_info_t reg_info[LM_DEV_NUM_REGS]; + + /* Optional PCI region access callbacks. */ + lm_bar_access_t *bar_fn; + lm_non_bar_access_t *rom_fn; + lm_non_bar_access_t *pci_config_fn; + lm_non_bar_access_t *vga_fn; +} lm_pci_info_t; + +/** + * Callback function signature for log function + * + * @lm_log_fn_t: typedef for log function. + */ +typedef void (lm_log_fn_t) (void *pvt, const char *const msg); + +/** + * Device information structure, used to create the lm_ctx. + * To be filled and passed to lm_ctx_run() + */ +typedef struct { + char *uuid; + void *pvt; + /* + * whether an extended PCI configuration space should be created + */ + bool extended; + int nr_dma_regions; + lm_log_fn_t *log; + lm_log_lvl_t log_lvl; + lm_fops_t fops; + lm_pci_hdr_id_t id; + lm_pci_hdr_cc_t cc; + lm_pci_info_t pci_info; +} lm_dev_info_t; + +/** + * Creates libmuser context. + * + * Arguments: + * @dev_info: device information used to create the context. + */ +lm_ctx_t *lm_ctx_create(lm_dev_info_t * dev_info); + +/** + * Destroys libmuser context. + * + * Arguments: + * @lm_ctx: libmuser context to destroy. + */ +void lm_ctx_destroy(lm_ctx_t * lm_ctx); + +/** + * Once the lm_ctx is configured lm_ctx_drive() drives it. This function waits + * for commands comming from muser.ko and then processes it.. + * + * Arguments: + * @lm_ctx: libmuser context to drive. + */ + +int lm_ctx_drive(lm_ctx_t * lm_ctx); + + +/** + * Creates mapping of BAR's into the callers vmem. It should be called from + * lm_fops_t->mmap. + * + * Arguments: + * @lm_ctx: libmuser context to create mapping from. + */ +void *lm_mmap(lm_ctx_t * lm_ctx, size_t length, off_t offset); + +/** + * Trigger interrupt. + * + * Arguments: + * @lm_ctx: libmuser context to trigger interrupt. + * @vector: vector to tirgger interrupt on. + */ +int lm_irq_trigger(lm_ctx_t * lm_ctx, uint32_t vector); + +/* Helper functions */ + +int lm_ctx_run(lm_ctx_t * const ctx); + +uint8_t *lm_get_pci_non_std_config_space(lm_ctx_t * const lm_ctx); + +int lm_addr_to_sg(lm_ctx_t * const ctx, dma_addr_t dma_addr, uint32_t len, + dma_scattergather_t * sg, int max_sg); + +int +lm_map_sg(lm_ctx_t * const ctx, int prot, const dma_scattergather_t * sg, + struct iovec *iov, int cnt); + +void +lm_unmap_sg(lm_ctx_t * const ctx, const dma_scattergather_t * sg, + struct iovec *iov, int cnt); + +int +lm_get_region(lm_ctx_t * const ctx, const loff_t pos, + const size_t count, loff_t * const off); + +#ifdef DEBUG +void +dump_buffer(lm_ctx_t const *const lm_ctx, char const *const prefix, + unsigned char const *const buf, const uint32_t count); +#endif + +#endif /* LIB_MUSER_H */ + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/pci.h b/lib/pci.h new file mode 100644 index 0000000..4b7132a --- /dev/null +++ b/lib/pci.h @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef LIBMUSER_PCI_H +#define LIBMUSER_PCI_H + +#include <stdint.h> +#include <stdbool.h> + +#include <linux/pci_regs.h> + +struct lm_ctx; +typedef struct lm_ctx lm_ctx_t; + +typedef uint64_t dma_addr_t; + +typedef struct { + int region; + int length; + uint64_t offset; +} dma_scattergather_t; + +typedef struct lm_ctx lm_ctx_t; +typedef struct lm_reg_info lm_reg_info_t; +typedef struct lm_pci_config_space lm_pci_config_space_t; + +typedef enum { + LM_ERR, + LM_INF, + LM_DBG +} lm_log_lvl_t; + +#define PCI_CONFIG_SPACE_SIZEOF 0x100 +#define PCI_EXTENDED_CONFIG_SPACE_SIZEOF 0x1000 + +enum { + LM_DEV_BAR0_REG_IDX, + LM_DEV_BAR1_REG_IDX, + LM_DEV_BAR2_REG_IDX, + LM_DEV_BAR3_REG_IDX, + LM_DEV_BAR4_REG_IDX, + LM_DEV_BAR5_REG_IDX, + LM_DEV_ROM_REG_IDX, + LM_DEV_CFG_REG_IDX, + LM_DEV_VGA_REG_IDX, + LM_DEV_NUM_REGS = 9 +}; + +/* + * TODO lots of the sizes of each member are defined in pci_regs.h, use those + * instead? + */ + +typedef union { + uint32_t raw; + struct { + uint16_t vid; + uint16_t sid; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_ss_t; +_Static_assert(sizeof(lm_pci_hdr_ss_t) == 0x4, "bad SS size"); + +typedef union { + uint8_t raw; +} __attribute__ ((packed)) lm_pci_hdr_bist_t; +_Static_assert(sizeof(lm_pci_hdr_bist_t) == 0x1, "bad BIST size"); + +typedef union { + uint32_t raw; + union { + struct { + unsigned int region_type:1; + unsigned int locatable:2; + unsigned int prefetchable:1; + unsigned int base_address:28; + } __attribute__ ((packed)) mem; + struct { + unsigned int region_type:1; + unsigned int reserved:1; + unsigned int base_address:30; + } __attribute__ ((packed)) io; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_bar_t; +_Static_assert(sizeof(lm_bar_t) == 0x4, "bad BAR size"); + +typedef union { + uint8_t raw; +} __attribute__ ((packed)) lm_pci_hdr_htype_t; +_Static_assert(sizeof(lm_pci_hdr_htype_t) == 0x1, "bad HTYPE size"); + +typedef union { + uint8_t raw[3]; + struct { + uint8_t pi; + uint8_t scc; + uint8_t bcc; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_cc_t; +_Static_assert(sizeof(lm_pci_hdr_cc_t) == 0x3, "bad CC size"); + +/* device status */ +typedef union { + uint16_t raw; + struct { + unsigned int res1:3; + unsigned int is:1; + unsigned int cl:1; + unsigned int c66:1; + unsigned int res2:1; + unsigned int fbc:1; + unsigned int dpd:1; + unsigned int devt:2; + unsigned int sta:1; + unsigned int rta:1; + unsigned int rma:1; + unsigned int sse:1; + unsigned int dpe:1; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_sts_t; +_Static_assert(sizeof(lm_pci_hdr_sts_t) == 0x2, "bad STS size"); + +typedef union { + uint16_t raw; + struct { + uint8_t iose:1; + uint8_t mse:1; + uint8_t bme:1; + uint8_t sce:1; + uint8_t mwie:1; + uint8_t vga:1; + uint8_t pee:1; + uint8_t zero:1; + uint8_t see:1; + uint8_t fbe:1; + uint8_t id:1; + uint8_t res1:5; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_cmd_t; +_Static_assert(sizeof(lm_pci_hdr_cmd_t) == 0x2, "bad CMD size"); + +typedef union { + uint32_t raw; + struct { + uint16_t vid; + uint16_t did; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_id_t; +_Static_assert(sizeof(lm_pci_hdr_id_t) == 0x4, "bad ID size"); + +typedef union { + uint16_t raw; + struct { + uint8_t iline; + uint8_t ipin; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_intr_t; +_Static_assert(sizeof(lm_pci_hdr_intr_t) == 0x2, "bad INTR size"); + +typedef union { + uint8_t raw[PCI_STD_HEADER_SIZEOF]; + struct { + lm_pci_hdr_id_t id; + lm_pci_hdr_cmd_t cmd; + lm_pci_hdr_sts_t sts; + uint8_t rid; + lm_pci_hdr_cc_t cc; + uint8_t cls; + uint8_t mlt; + lm_pci_hdr_htype_t htype; + lm_pci_hdr_bist_t bist; +#define PCI_BARS_NR 6 + lm_bar_t bars[PCI_BARS_NR]; + uint32_t ccptr; + lm_pci_hdr_ss_t ss; + uint32_t erom; + uint8_t cap; + uint8_t res1[7]; + lm_pci_hdr_intr_t intr; + uint8_t mgnt; + uint8_t mlat; + } __attribute__ ((packed)); +} __attribute__ ((packed)) lm_pci_hdr_t; +_Static_assert(sizeof(lm_pci_hdr_t) == 0x40, "bad PCI header size"); + +typedef struct { + uint8_t raw[PCI_CONFIG_SPACE_SIZEOF - PCI_STD_HEADER_SIZEOF]; +} __attribute__ ((packed)) lm_pci_non_std_config_space_t; +_Static_assert(sizeof(lm_pci_non_std_config_space_t) == 0xc0, + "bad non-standard PCI configuration space size"); + +struct lm_pci_config_space { + union { + uint8_t raw[PCI_CONFIG_SPACE_SIZEOF]; + struct { + lm_pci_hdr_t hdr; + lm_pci_non_std_config_space_t non_std; + } __attribute__ ((packed)); + } __attribute__ ((packed)); + uint8_t extended[]; +} __attribute__ ((packed)); +_Static_assert(sizeof(struct lm_pci_config_space) == 0x100, + "bad PCI configuration space size"); + +// Region flags. +#define LM_REG_FLAG_READ (1 << 0) +#define LM_REG_FLAG_WRITE (1 << 1) +#define LM_REG_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar? +#define LM_REG_FLAG_RW (LM_REG_FLAG_READ | LM_REG_FLAG_WRITE) +#define LM_REG_FLAG_MEM (1 << 3) // if unset, bar is IO + +struct lm_reg_info { + uint32_t flags; + uint32_t size; + uint64_t offset; +}; + +enum { + LM_DEV_INTX_IRQ_IDX, + LM_DEV_MSI_IRQ_IDX, + LM_DEV_MSIX_IRQ_IDX, + LM_DEV_ERR_IRQ_IDX, + LM_DEV_REQ_IRQ_IDX, + LM_DEV_NUM_IRQS = 5 +}; + +/* + * Returns a pointer to the non-standard part of the PCI configuration space. + */ +lm_pci_config_space_t *lm_get_pci_config_space(lm_ctx_t * const lm_ctx); + +lm_reg_info_t *lm_get_region_info(lm_ctx_t * const lm_ctx); + +/* + * TODO the rest of these functions don't need to be public, put them in a + * private header file so libmuser.c can use them. + * TODO replace the "muser" prefix + */ +int +muser_pci_hdr_access(lm_ctx_t * const lm_ctx, size_t * const count, + loff_t * const pos, const bool write, + unsigned char *const buf); + + + +#endif /* LIBMUSER_PCI_H */ + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/pmcap.h b/lib/pmcap.h new file mode 100644 index 0000000..2757a3e --- /dev/null +++ b/lib/pmcap.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +struct pid { + unsigned int cid:8; + unsigned int next:8; +} __attribute__((packed)); +_Static_assert(sizeof(struct pid) == 0x2, "bad PID size"); + +struct pc { + unsigned int vs:3; + unsigned int pmec:1; + unsigned int res:1; + unsigned int dsi:1; + unsigned int auxc:3; + unsigned int d1s:1; + unsigned int d2s:1; + unsigned int psup:5; +} __attribute__((packed)); +_Static_assert(sizeof(struct pc) == 0x2, "bad PC size"); + +struct pmcs { + unsigned int ps:2; + unsigned int res1:1; + unsigned int nsfrst:1; + unsigned int res2:4; + unsigned int pmee:1; + unsigned int dse:4; + unsigned int dsc:2; + unsigned int pmes:1; +}; +_Static_assert(sizeof(struct pc) == 0x2, "bad PC size"); + +struct pmcap { + struct pid pid; + struct pc pc; + struct pmcs pmcs; +} __attribute__((packed)) __attribute__ ((aligned(8))); +_Static_assert(sizeof(struct pmcap) == 0x8, "bad PC size"); + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/pxcap.h b/lib/pxcap.h new file mode 100644 index 0000000..fbea685 --- /dev/null +++ b/lib/pxcap.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos <thanos@nutanix.com> + * Swapnil Ingle <swapnil.ingle@nutanix.com> + * Felipe Franciosi <felipe@nutanix.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +struct pxid { + unsigned int cid:8; + unsigned int next:8; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxid) == 0x2, "bad PXID size"); + +struct pxcap { + unsigned int ver:4; + unsigned int dpt:4; + unsigned int si:1; + unsigned int imn:5; + unsigned int res1:2; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxcap) == 0x2, "bad PXCAP size"); + +struct pxdcap { + unsigned int mps:3; + unsigned int pfs:2; + unsigned int etfs:1; + unsigned int l0sl:3; + unsigned int l1l:3; + unsigned int per:1; + unsigned int res1:2; + unsigned int csplv:8; + unsigned int cspls:2; + unsigned int flrc:1; + unsigned int res2:3; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxdcap) == 0x4, "bad PXDCAP size"); + +union pxdc { + uint16_t raw; + struct { + unsigned int cere:1; + unsigned int nfere:1; + unsigned int fere:1; + unsigned int urre:1; + unsigned int ero:1; + unsigned int mps:3; + unsigned int ete:1; + unsigned int pfe:1; + unsigned int appme:1; + unsigned int ens:1; + unsigned int mrrs:3; + unsigned int iflr:1; + } __attribute__((packed)); +} __attribute__((packed)); +_Static_assert(sizeof(union pxdc) == 0x2, "bad PXDC size"); + +/* TODO not defining for now since all values are 0 for reset */ +struct pxds { + unsigned int stuff:16; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxds) == 0x2, "bad PXDS size"); + +struct pxlcap { + unsigned int stuff:32; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxlcap) == 0x4, "bad PXLCAP size"); + +struct pxlc { + unsigned int stuff:16; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxlc) == 0x2, "bad PXLC size"); + +struct pxls { + unsigned int stuff:16; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxls) == 0x2, "bad PXLS size"); + +struct pxdcap2 { + unsigned int ctrs:4; + unsigned int ctds:1; + unsigned int arifs:1; + unsigned int aors:1; + unsigned int aocs32:1; + unsigned int aocs64:1; + unsigned int ccs128:1; + unsigned int nprpr:1; + unsigned int ltrs:1; + unsigned int tphcs:2; + unsigned int obffs:2; + unsigned int effs:1; + unsigned int eetps:1; + unsigned int meetp:2; + unsigned int res1:8; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxdcap2) == 0x4, "bad PXDCAP2 size"); + +struct pxdc2 { + unsigned int stuff:16; +} __attribute__((packed)); +_Static_assert(sizeof(struct pxdc2) == 0x2, "bad PXDC2 size"); + +/* TODO name conflicts with PXCAP */ +struct PCI_Express_Capability { + struct pxid pxid; + struct pxcap pxcap; + struct pxdcap pxdcap; + union pxdc pxdc; + struct pxds pxds; + struct pxlcap pxlcap; + struct pxlc pxlc; + struct pxls pxls; + uint8_t pad[0x10]; + struct pxdcap2 pxdcap2; + struct pxdc2 pxdc2; +} __attribute__((packed)); +_Static_assert(sizeof(struct PCI_Express_Capability) == 0x2a, + "bad PCI Express Capability size"); + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |