/* * Copyright (c) 2021 Nutanix Inc. All rights reserved. * * Authors: Thanos Makatos * Swapnil Ingle * Felipe Franciosi * John Levon * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Nutanix nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ /* * Capability handling. We handle reads and writes to standard capabilities * ourselves, and optionally for vendor capabilities too. For each access (via * pci_config_space_access() -> pci_cap_access()), if we find that we're * reading from a particular capability offset: * * - if VFU_CAP_FLAG_CALLBACK is set, we call the config space region callback * given by the user * - else we memcpy() the capability data back out to the client * * For writes: * * - if VFU_CAP_FLAG_READONLY is set, we fail the write * - if VFU_CAP_FLAG_CALLBACK is set, we call the config space region callback * given by the user * - else we call the cap-specific callback to handle the write. * * Extended capabilities live in extended space (after the first 256 bytes), so * can never clash with a standard capability. An empty capability list is * signalled by a zeroed header at offset 256 (which the config space has by * default). */ #include #include #include #include #include #include #include "common.h" #include "libvfio-user.h" #include "pci_caps.h" #include "pci.h" #include "private.h" /* All capabilities must be dword-aligned. */ #define CAP_ROUND (4) static void * cap_data(vfu_ctx_t *vfu_ctx, struct pci_cap *cap) { return (void *)pci_config_space_ptr(vfu_ctx, cap->off); } static size_t cap_size(vfu_ctx_t *vfu_ctx, void *data, bool extended) { if (extended) { uint16_t id = ((struct pcie_ext_cap_hdr *)data)->id; switch (id) { case PCI_EXT_CAP_ID_DSN: return PCI_EXT_CAP_DSN_SIZEOF; case PCI_EXT_CAP_ID_VNDR: return ((struct pcie_ext_cap_vsc_hdr *)data)->len; default: vfu_log(vfu_ctx, LOG_ERR, "invalid cap id %u", id); abort(); } } else { uint8_t id = ((struct cap_hdr *)data)->id; switch (id) { case PCI_CAP_ID_PM: return PCI_PM_SIZEOF; case PCI_CAP_ID_EXP: return VFIO_USER_PCI_CAP_EXP_SIZEOF; case PCI_CAP_ID_MSI: return VFIO_USER_PCI_CAP_MSI_SIZEOF; case PCI_CAP_ID_MSIX: return PCI_CAP_MSIX_SIZEOF; case PCI_CAP_ID_VNDR: return ((struct vsc *)data)->size; default: vfu_log(vfu_ctx, LOG_ERR, "invalid cap id %u", id); abort(); } } } static ssize_t handle_pmcs_write(vfu_ctx_t *vfu_ctx, struct pmcap *pm, const struct pmcs *const pmcs) { if (pm->pmcs.ps != pmcs->ps) { vfu_log(vfu_ctx, LOG_DEBUG, "power state set to %#x", pmcs->ps); } if (pm->pmcs.pmee != pmcs->pmee) { vfu_log(vfu_ctx, LOG_DEBUG, "PME enable set to %#x", pmcs->pmee); } if (pm->pmcs.dse != pmcs->dse) { vfu_log(vfu_ctx, LOG_DEBUG, "data select set to %#x", pmcs->dse); } if (pm->pmcs.pmes != pmcs->pmes) { vfu_log(vfu_ctx, LOG_DEBUG, "PME status set to %#x", pmcs->pmes); } pm->pmcs = *pmcs; return 0; } static ssize_t cap_write_pm(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char * buf, size_t count, loff_t offset) { struct pmcap *pm = cap_data(vfu_ctx, cap); switch (offset - cap->off) { case offsetof(struct pmcap, pc): if (count != sizeof(struct pc)) { return ERROR_INT(EINVAL); } vfu_log(vfu_ctx, LOG_ERR, "FIXME: write to pmcap::pc unimplemented"); return ERROR_INT(ENOTSUP); case offsetof(struct pmcap, pmcs): if (count != sizeof(struct pmcs)) { return ERROR_INT(EINVAL); } handle_pmcs_write(vfu_ctx, pm, (struct pmcs *)buf); return sizeof(struct pmcs); case offsetof(struct pmcap, pmcsr_bse): if (count != 1) { return ERROR_INT(EINVAL); } vfu_log(vfu_ctx, LOG_ERR, "FIXME: write to pmcap::pmcsr_bse unimplemented"); return ERROR_INT(ENOTSUP); case offsetof(struct pmcap, data): if (count != 1) { return ERROR_INT(EINVAL); } vfu_log(vfu_ctx, LOG_ERR, "FIXME: write to pmcap::data unimplemented"); return ERROR_INT(ENOTSUP); } return ERROR_INT(EINVAL); } static ssize_t cap_write_msi(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf, size_t count, loff_t offset) { struct msicap *msi = cap_data(vfu_ctx, cap); struct msicap new_msi = *msi; memcpy((char *)&new_msi + offset - cap->off, buf, count); if (msi->mc.msie != new_msi.mc.msie) { msi->mc.msie = new_msi.mc.msie; vfu_log(vfu_ctx, LOG_DEBUG, "%s MSI", msi->mc.msie ? "enable" : "disable"); } if (msi->mc.mme != new_msi.mc.mme) { if (new_msi.mc.mme > 5) { vfu_log(vfu_ctx, LOG_ERR, "MSI cannot have more than 32 interrupt vectors"); return ERROR_INT(EINVAL); } if (new_msi.mc.mme > msi->mc.mmc) { vfu_log(vfu_ctx, LOG_ERR, "MSI cannot have more interrupt vectors" " in MME than defined in MMC"); return ERROR_INT(EINVAL); } msi->mc.mme = new_msi.mc.mme; vfu_log(vfu_ctx, LOG_DEBUG, "MSI Updated Multiple Message Enable count"); } if (msi->ma.addr != new_msi.ma.addr) { msi->ma.addr = new_msi.ma.addr; vfu_log(vfu_ctx, LOG_DEBUG, "MSI Message Address set to %x", msi->ma.addr << 2); } if (msi->mua != new_msi.mua) { msi->mua = new_msi.mua; vfu_log(vfu_ctx, LOG_DEBUG, "MSI Message Upper Address set to %x", msi->mua); } if (msi->md != new_msi.md) { msi->md = new_msi.md; vfu_log(vfu_ctx, LOG_DEBUG, "MSI Message Data set to %x", msi->md); } if (msi->mmask != new_msi.mmask) { msi->mmask = new_msi.mmask; vfu_log(vfu_ctx, LOG_DEBUG, "MSI Mask Bits set to %x", msi->mmask); } return count; } static ssize_t cap_write_msix(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf, size_t count, loff_t offset) { struct msixcap *msix = cap_data(vfu_ctx, cap); struct msixcap new_msix = *msix; memcpy((char *)&new_msix + offset - cap->off, buf, count); /* * Same as doing &= (PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE), but * prefer to log what's changing. */ if (msix->mxc.fm != new_msix.mxc.fm) { msix->mxc.fm = new_msix.mxc.fm; if (msix->mxc.fm) { vfu_log(vfu_ctx, LOG_DEBUG, "all MSI-X vectors masked"); } else { vfu_log(vfu_ctx, LOG_DEBUG, "vector's mask bit determines whether vector is masked"); } } if (msix->mxc.mxe != new_msix.mxc.mxe) { msix->mxc.mxe = new_msix.mxc.mxe; vfu_log(vfu_ctx, LOG_DEBUG, "%s MSI-X", msix->mxc.mxe ? "enable" : "disable"); } return count; } static int handle_px_pxdc_write(vfu_ctx_t *vfu_ctx, struct pxcap *px, const union pxdc *const p) { assert(px != NULL); assert(p != NULL); if (p->cere != px->pxdc.cere) { px->pxdc.cere = p->cere; vfu_log(vfu_ctx, LOG_DEBUG, "CERE %s", p->cere ? "enable" : "disable"); } if (p->nfere != px->pxdc.nfere) { px->pxdc.nfere = p->nfere; vfu_log(vfu_ctx, LOG_DEBUG, "NFERE %s", p->nfere ? "enable" : "disable"); } if (p->fere != px->pxdc.fere) { px->pxdc.fere = p->fere; vfu_log(vfu_ctx, LOG_DEBUG, "FERE %s", p->fere ? "enable" : "disable"); } if (p->urre != px->pxdc.urre) { px->pxdc.urre = p->urre; vfu_log(vfu_ctx, LOG_DEBUG, "URRE %s", p->urre ? "enable" : "disable"); } if (p->ero != px->pxdc.ero) { px->pxdc.ero = p->ero; vfu_log(vfu_ctx, LOG_DEBUG, "ERO %s", p->ero ? "enable" : "disable"); } if (p->mps != px->pxdc.mps) { px->pxdc.mps = p->mps; vfu_log(vfu_ctx, LOG_DEBUG, "MPS set to %d", p->mps); } if (p->ete != px->pxdc.ete) { px->pxdc.ete = p->ete; vfu_log(vfu_ctx, LOG_DEBUG, "ETE %s", p->ete ? "enable" : "disable"); } if (p->pfe != px->pxdc.pfe) { px->pxdc.pfe = p->pfe; vfu_log(vfu_ctx, LOG_DEBUG, "PFE %s", p->pfe ? "enable" : "disable"); } if (p->appme != px->pxdc.appme) { px->pxdc.appme = p->appme; vfu_log(vfu_ctx, LOG_DEBUG, "APPME %s", p->appme ? "enable" : "disable"); } if (p->ens != px->pxdc.ens) { px->pxdc.ens = p->ens; vfu_log(vfu_ctx, LOG_DEBUG, "ENS %s", p->ens ? "enable" : "disable"); } if (p->mrrs != px->pxdc.mrrs) { px->pxdc.mrrs = p->mrrs; vfu_log(vfu_ctx, LOG_DEBUG, "MRRS set to %d", p->mrrs); } if (p->iflr) { if (px->pxdcap.flrc == 0) { vfu_log(vfu_ctx, LOG_ERR, "FLR capability is not supported"); return ERROR_INT(EINVAL); } if (vfu_ctx->reset != NULL) { vfu_log(vfu_ctx, LOG_DEBUG, "initiate function level reset"); return call_reset_cb(vfu_ctx, VFU_RESET_PCI_FLR); } else { vfu_log(vfu_ctx, LOG_ERR, "FLR callback is not implemented"); } } return 0; } /* TODO implement */ static int handle_px_pxlc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED, const union pxlc *const p UNUSED) { return 0; } /* TODO implement */ static int handle_px_pxsc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED, const struct pxsc *const p UNUSED) { return 0; } /* TODO implement */ static int handle_px_pxrc_write(vfu_ctx_t *vfu_ctx UNUSED, struct pxcap *px UNUSED, const struct pxrc *const p UNUSED) { return 0; } static int handle_px_pxdc2_write(vfu_ctx_t *vfu_ctx, struct pxcap *px, const union pxdc2 *const p) { assert(px != NULL); assert(p != NULL); if (p->raw != px->pxdc2.raw) { vfu_log(vfu_ctx, LOG_DEBUG, "Device Control 2 set to %#x", p->raw); } px->pxdc2 = *p; return 0; } static int handle_px_pxlc2_write(vfu_ctx_t *vfu_ctx, struct pxcap *px, const struct pxlc2 *const p) { assert(px != NULL); assert(p != NULL); if (p->stuff != px->pxlc2.stuff) { vfu_log(vfu_ctx, LOG_DEBUG, "Link Control 2 set to %#x", p->stuff); } px->pxlc2 = *p; return 0; } static int handle_px_write_2_bytes(vfu_ctx_t *vfu_ctx, struct pxcap *px, char *buf, loff_t off) { switch (off) { case offsetof(struct pxcap, pxdc): return handle_px_pxdc_write(vfu_ctx, px, (union pxdc *)buf); case offsetof(struct pxcap, pxlc): return handle_px_pxlc_write(vfu_ctx, px, (union pxlc *)buf); case offsetof(struct pxcap, pxsc): return handle_px_pxsc_write(vfu_ctx, px, (struct pxsc *)buf); case offsetof(struct pxcap, pxrc): return handle_px_pxrc_write(vfu_ctx, px, (struct pxrc *)buf); case offsetof(struct pxcap, pxdc2): return handle_px_pxdc2_write(vfu_ctx, px, (union pxdc2 *)buf); case offsetof(struct pxcap, pxlc2): return handle_px_pxlc2_write(vfu_ctx, px, (struct pxlc2 *)buf); case offsetof(struct pxcap, pxsc2): /* RsvdZ */ return 0; } return ERROR_INT(EINVAL); } static ssize_t cap_write_px(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf, size_t count, loff_t offset) { struct pxcap *px = cap_data(vfu_ctx, cap); int err; switch (count) { case 2: err = handle_px_write_2_bytes(vfu_ctx, px, buf, offset - cap->off); break; default: err = ERROR_INT(EINVAL); break; } if (err != 0) { return err; } return count; } static ssize_t cap_write_vendor(vfu_ctx_t *vfu_ctx, struct pci_cap *cap UNUSED, char *buf, size_t count, loff_t offset) { memcpy(pci_config_space_ptr(vfu_ctx, offset), buf, count); return count; } static ssize_t ext_cap_write_dsn(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, char *buf UNUSED, size_t count UNUSED, loff_t offset UNUSED) { vfu_log(vfu_ctx, LOG_ERR, "%s capability is read-only", cap->name); return ERROR_INT(EPERM); } static ssize_t ext_cap_write_vendor(vfu_ctx_t *vfu_ctx, struct pci_cap *cap UNUSED, char *buf, size_t count, loff_t offset) { memcpy(pci_config_space_ptr(vfu_ctx, offset), buf, count); return count; } static bool ranges_intersect(size_t off1, size_t size1, size_t off2, size_t size2) { return (off1 < (off2 + size2) && (off1 + size1) >= off2); } struct pci_cap * cap_find_by_offset(vfu_ctx_t *vfu_ctx, loff_t offset, size_t count) { size_t i; for (i = 0; i < vfu_ctx->pci.nr_caps; i++) { struct pci_cap *cap = &vfu_ctx->pci.caps[i]; if (ranges_intersect(offset, count, cap->off, cap->size)) { return cap; } } for (i = 0; i < vfu_ctx->pci.nr_ext_caps; i++) { struct pci_cap *cap = &vfu_ctx->pci.ext_caps[i]; if (ranges_intersect(offset, count, cap->off, cap->size)) { return cap; } } return NULL; } ssize_t pci_cap_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, bool is_write) { struct pci_cap *cap = cap_find_by_offset(vfu_ctx, offset, count); assert(cap != NULL); assert((size_t)offset >= cap->off); assert(count <= cap->size); if (is_write && (cap->flags & VFU_CAP_FLAG_READONLY)) { vfu_log(vfu_ctx, LOG_ERR, "write of %zu bytes to read-only capability " "%u (%s)", count, cap->id, cap->name); return ERROR_INT(EPERM); } if (cap->flags & VFU_CAP_FLAG_CALLBACK) { return pci_nonstd_access(vfu_ctx, buf, count, offset, is_write); } if (!is_write) { memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count); return count; } if (offset - cap->off < cap->hdr_size) { vfu_log(vfu_ctx, LOG_ERR, "disallowed write to header for cap %d (%s)", cap->id, cap->name); return ERROR_INT(EPERM); } return cap->cb(vfu_ctx, cap, buf, count, offset); } /* * Place the new capability after the previous (or after the standard header if * this is the first capability). * * If cap->off is already provided, place it directly, but first check it * doesn't overlap an existing capability, or the PCI header. We still also need * to link it into the list. There's no guarantee that the list is ordered by * offset after doing so. */ static int cap_place(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, void *data) { vfu_pci_config_space_t *config_space; uint8_t *prevp = NULL; size_t offset; config_space = vfu_pci_get_config_space(vfu_ctx); prevp = &config_space->hdr.cap; if (cap->off != 0) { if (cap->off < PCI_STD_HEADER_SIZEOF) { vfu_log(vfu_ctx, LOG_ERR, "invalid offset %zx for capability " "%u (%s)", cap->off, cap->id, cap->name); return ERROR_INT(EINVAL); } if (cap_find_by_offset(vfu_ctx, cap->off, cap->size) != NULL) { vfu_log(vfu_ctx, LOG_ERR, "overlap found for capability " "%u (%s)", cap->id, cap->name); return ERROR_INT(EINVAL); } while (*prevp != 0) { prevp = pci_config_space_ptr(vfu_ctx, *prevp + PCI_CAP_LIST_NEXT); } } else if (*prevp == 0) { cap->off = PCI_STD_HEADER_SIZEOF; } else { for (offset = *prevp; offset != 0; offset = *prevp) { size_t size; prevp = pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT); if (*prevp == 0) { size = cap_size(vfu_ctx, pci_config_space_ptr(vfu_ctx, offset), false); cap->off = ROUND_UP(offset + size, 4); break; } } } if (cap->off + cap->size > pci_config_space_size(vfu_ctx)) { vfu_log(vfu_ctx, LOG_ERR, "no config space left for capability " "%u (%s) of size %zu bytes at offset %zx", cap->id, cap->name, cap->size, cap->off); return ERROR_INT(ENOSPC); } memcpy(cap_data(vfu_ctx, cap), data, cap->size); /* Make sure the previous cap's PCI_CAP_LIST_NEXT points to us. */ *prevp = cap->off; /* Make sure our PCI_CAP_LIST_NEXT is zeroed. */ *pci_config_space_ptr(vfu_ctx, cap->off + PCI_CAP_LIST_NEXT) = 0; return 0; } /* * Place the new extended capability after the previous (or at the beginning of * extended config space, replacing the initial zeroed capability). * * If cap->off is already provided, place it directly, but first check it * doesn't overlap an existing extended capability, and that the first one * replaces the initial zeroed capability. We also still need to link it into * the list. */ static int ext_cap_place(vfu_ctx_t *vfu_ctx, struct pci_cap *cap, void *data) { struct pcie_ext_cap_hdr *hdr = NULL; hdr = (void *)pci_config_space_ptr(vfu_ctx, PCI_CFG_SPACE_SIZE); if (cap->off != 0) { if (cap->off < PCI_CFG_SPACE_SIZE) { vfu_log(vfu_ctx, LOG_ERR, "invalid offset %zx for capability " "%u (%s)", cap->off, cap->id, cap->name); return ERROR_INT(EINVAL); } if (cap_find_by_offset(vfu_ctx, cap->off, cap->size) != NULL) { vfu_log(vfu_ctx, LOG_ERR, "overlap found for capability " "%u (%s)", cap->id, cap->name); return ERROR_INT(EINVAL); } if (hdr->id == 0x0 && cap->off != PCI_CFG_SPACE_SIZE) { vfu_log(vfu_ctx, LOG_ERR, "first extended capability must be at " "%#x", PCI_CFG_SPACE_SIZE); return ERROR_INT(EINVAL); } while (hdr->next != 0) { hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next); } } else if (hdr->id == 0x0) { hdr = NULL; cap->off = PCI_CFG_SPACE_SIZE; } else { while (hdr->next != 0) { hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next); } cap->off = ROUND_UP((uint8_t *)hdr + cap_size(vfu_ctx, hdr, true) - pci_config_space_ptr(vfu_ctx, 0), CAP_ROUND); } if (cap->off + cap->size > pci_config_space_size(vfu_ctx)) { vfu_log(vfu_ctx, LOG_ERR, "no config space left for capability " "%u (%s) of size %zu bytes at offset %zu", cap->id, cap->name, cap->size, cap->off); return ERROR_INT(ENOSPC); } memcpy(cap_data(vfu_ctx, cap), data, cap->size); /* Make sure the previous cap's next points to us. */ if (hdr != NULL) { assert((cap->off & 0x3) == 0); hdr->next = cap->off; } hdr = (void *)pci_config_space_ptr(vfu_ctx, cap->off); hdr->next = 0; return 0; } EXPORT ssize_t vfu_pci_add_capability(vfu_ctx_t *vfu_ctx, size_t pos, int flags, void *data) { bool extended = (flags & VFU_CAP_FLAG_EXTENDED); struct pci_cap cap = { 0 }; int ret; assert(vfu_ctx != NULL); if (flags & ~(VFU_CAP_FLAG_EXTENDED | VFU_CAP_FLAG_CALLBACK | VFU_CAP_FLAG_READONLY)) { vfu_log(vfu_ctx, LOG_DEBUG, "bad flags %#x", flags); return ERROR_INT(EINVAL); } if ((flags & VFU_CAP_FLAG_CALLBACK) && vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb == NULL) { vfu_log(vfu_ctx, LOG_DEBUG, "no callback"); return ERROR_INT(EINVAL); } cap.off = pos; cap.flags = flags; cap.extended = extended; if (extended) { switch (vfu_ctx->pci.type) { case VFU_PCI_TYPE_PCI_X_2: case VFU_PCI_TYPE_EXPRESS: break; default: vfu_log(vfu_ctx, LOG_DEBUG, "bad PCI type %#x", vfu_ctx->pci.type); return ERROR_INT(EINVAL); } if (vfu_ctx->pci.nr_ext_caps == VFU_MAX_CAPS) { return ERROR_INT(ENOSPC); } cap.id = ((struct pcie_ext_cap_hdr *)data)->id; cap.hdr_size = sizeof(struct pcie_ext_cap_hdr); switch (cap.id) { case PCI_EXT_CAP_ID_DSN: cap.name = "Device Serial Number"; cap.cb = ext_cap_write_dsn; break; case PCI_EXT_CAP_ID_VNDR: cap.name = "Vendor-Specific"; cap.cb = ext_cap_write_vendor; cap.hdr_size = sizeof(struct pcie_ext_cap_vsc_hdr); break; default: vfu_log(vfu_ctx, LOG_ERR, "unsupported capability %#x", cap.id); return ERROR_INT(ENOTSUP); } cap.size = cap_size(vfu_ctx, data, extended); if (cap.off + cap.size >= pci_config_space_size(vfu_ctx)) { vfu_log(vfu_ctx, LOG_DEBUG, "bad PCIe capability offset"); return ERROR_INT(EINVAL); } ret = ext_cap_place(vfu_ctx, &cap, data); } else { if (vfu_ctx->pci.nr_caps == VFU_MAX_CAPS) { return ERROR_INT(ENOSPC); } cap.id = ((struct cap_hdr *)data)->id; cap.hdr_size = sizeof(struct cap_hdr); switch (cap.id) { case PCI_CAP_ID_PM: cap.name = "Power Management"; cap.cb = cap_write_pm; break; case PCI_CAP_ID_EXP: cap.name = "PCI Express"; cap.cb = cap_write_px; break; case PCI_CAP_ID_MSI: cap.name = "MSI"; cap.cb = cap_write_msi; break; case PCI_CAP_ID_MSIX: cap.name = "MSI-X"; cap.cb = cap_write_msix; break; case PCI_CAP_ID_VNDR: cap.name = "Vendor-Specific"; cap.cb = cap_write_vendor; cap.hdr_size = sizeof(struct vsc); break; default: vfu_log(vfu_ctx, LOG_ERR, "unsupported capability %#x", cap.id); return ERROR_INT(ENOTSUP); } cap.size = cap_size(vfu_ctx, data, extended); if (cap.off + cap.size >= pci_config_space_size(vfu_ctx)) { vfu_log(vfu_ctx, LOG_DEBUG, "PCI capability past end of config space, %zx >= %zx", cap.off + cap.size, pci_config_space_size(vfu_ctx)); return ERROR_INT(EINVAL); } ret = cap_place(vfu_ctx, &cap, data); } if (ret != 0) { return ret; } vfu_log(vfu_ctx, LOG_DEBUG, "added PCI cap \"%s\" size=%#zx offset=%#zx", cap.name, cap.size, cap.off); if (extended) { memcpy(&vfu_ctx->pci.ext_caps[vfu_ctx->pci.nr_ext_caps], &cap, sizeof(cap)); vfu_ctx->pci.nr_ext_caps++; } else { memcpy(&vfu_ctx->pci.caps[vfu_ctx->pci.nr_caps], &cap, sizeof(cap)); vfu_ctx->pci.nr_caps++; } if (cap.id == PCI_CAP_ID_EXP) { vfu_ctx->pci_cap_exp_off = cap.off; } return cap.off; } static size_t vfu_pci_find_next_ext_capability(vfu_ctx_t *vfu_ctx, size_t offset, int cap_id) { struct pcie_ext_cap_hdr *hdr = NULL; if (offset + sizeof(*hdr) >= pci_config_space_size(vfu_ctx)) { errno = EINVAL; return 0; } if (offset == 0) { offset = PCI_CFG_SPACE_SIZE; hdr = (void *)pci_config_space_ptr(vfu_ctx, offset); } else { hdr = (void *)pci_config_space_ptr(vfu_ctx, offset); hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next); } for (;;) { offset = (uint8_t *)hdr - pci_config_space_ptr(vfu_ctx, 0); if (offset + sizeof(*hdr) >= pci_config_space_size(vfu_ctx)) { errno = EINVAL; return 0; } if (hdr->id == cap_id) { return offset; } if (hdr->next == 0) { break; } hdr = (void *)pci_config_space_ptr(vfu_ctx, hdr->next); } errno = ENOENT; return 0; } EXPORT size_t vfu_pci_find_next_capability(vfu_ctx_t *vfu_ctx, bool extended, size_t offset, int cap_id) { assert(vfu_ctx != NULL); if (extended) { return vfu_pci_find_next_ext_capability(vfu_ctx, offset, cap_id); } if (offset + PCI_CAP_LIST_NEXT >= pci_config_space_size(vfu_ctx)) { errno = EINVAL; return 0; } if (offset == 0) { offset = vfu_pci_get_config_space(vfu_ctx)->hdr.cap; } else { offset = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT); } if (offset == 0) { errno = ENOENT; return 0; } for (;;) { uint8_t id, next; /* Sanity check. */ if (offset + PCI_CAP_LIST_NEXT >= pci_config_space_size(vfu_ctx)) { errno = EINVAL; return 0; } id = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_ID); next = *pci_config_space_ptr(vfu_ctx, offset + PCI_CAP_LIST_NEXT); if (id == cap_id) { return offset; } offset = next; if (offset == 0) { errno = ENOENT; return 0; } } } EXPORT size_t vfu_pci_find_capability(vfu_ctx_t *vfu_ctx, bool extended, int cap_id) { return vfu_pci_find_next_capability(vfu_ctx, extended, 0, cap_id); } bool access_is_pci_cap_exp(const vfu_ctx_t *vfu_ctx, size_t region_index, uint64_t offset) { size_t _offset = vfu_ctx->pci_cap_exp_off + offsetof(struct pxcap, pxdc); return region_index == VFU_PCI_DEV_CFG_REGION_IDX && offset == _offset; } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */