/* * Copyright (c) 2019 Nutanix Inc. All rights reserved. * * Authors: Thanos Makatos * Swapnil Ingle * Felipe Franciosi * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Nutanix nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include #include #include #include #include #include #include "pci_caps.h" #include "common.h" #include "libvfio-user.h" #include "pci.h" #include "private.h" static inline void pci_hdr_write_bar(vfu_ctx_t *vfu_ctx, uint16_t bar_index, const char *buf) { uint32_t cfg_addr; unsigned long mask; vfu_pci_hdr_t *hdr; assert(vfu_ctx != NULL); if (vfu_ctx->reg_info[bar_index].size == 0) { return; } hdr = &vfu_pci_get_config_space(vfu_ctx)->hdr; cfg_addr = *(uint32_t *) buf; vfu_log(vfu_ctx, LOG_DEBUG, "BAR%d addr 0x%x", bar_index, cfg_addr); if (cfg_addr == 0xffffffff) { cfg_addr = ~(vfu_ctx->reg_info[bar_index].size) + 1; } if ((vfu_ctx->reg_info[bar_index].flags & VFU_REGION_FLAG_MEM)) { mask = PCI_BASE_ADDRESS_MEM_MASK; } else { mask = PCI_BASE_ADDRESS_IO_MASK; } cfg_addr |= (hdr->bars[bar_index].raw & ~mask); hdr->bars[bar_index].raw = htole32(cfg_addr); } #define BAR_INDEX(offset) ((offset - PCI_BASE_ADDRESS_0) >> 2) static int handle_command_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, const char *buf) { uint16_t v; assert(ctx != NULL); assert(pci != NULL); assert(buf != NULL); v = *(uint16_t*)buf; if ((v & PCI_COMMAND_IO) == PCI_COMMAND_IO) { if (!pci->hdr.cmd.iose) { pci->hdr.cmd.iose = 0x1; vfu_log(ctx, LOG_DEBUG, "I/O space enabled"); } v &= ~PCI_COMMAND_IO; } else { if (pci->hdr.cmd.iose) { pci->hdr.cmd.iose = 0x0; vfu_log(ctx, LOG_DEBUG, "I/O space disabled"); } } if ((v & PCI_COMMAND_MEMORY) == PCI_COMMAND_MEMORY) { if (!pci->hdr.cmd.mse) { pci->hdr.cmd.mse = 0x1; vfu_log(ctx, LOG_DEBUG, "memory space enabled"); } v &= ~PCI_COMMAND_MEMORY; } else { if (pci->hdr.cmd.mse) { pci->hdr.cmd.mse = 0x0; vfu_log(ctx, LOG_DEBUG, "memory space disabled"); } } if ((v & PCI_COMMAND_MASTER) == PCI_COMMAND_MASTER) { if (!pci->hdr.cmd.bme) { pci->hdr.cmd.bme = 0x1; vfu_log(ctx, LOG_DEBUG, "bus master enabled"); } v &= ~PCI_COMMAND_MASTER; } else { if (pci->hdr.cmd.bme) { pci->hdr.cmd.bme = 0x0; vfu_log(ctx, LOG_DEBUG, "bus master disabled"); } } if ((v & PCI_COMMAND_SERR) == PCI_COMMAND_SERR) { if (!pci->hdr.cmd.see) { pci->hdr.cmd.see = 0x1; vfu_log(ctx, LOG_DEBUG, "SERR# enabled"); } v &= ~PCI_COMMAND_SERR; } else { if (pci->hdr.cmd.see) { pci->hdr.cmd.see = 0x0; vfu_log(ctx, LOG_DEBUG, "SERR# disabled"); } } if ((v & PCI_COMMAND_INTX_DISABLE) == PCI_COMMAND_INTX_DISABLE) { if (!pci->hdr.cmd.id) { pci->hdr.cmd.id = 0x1; vfu_log(ctx, LOG_DEBUG, "INTx emulation disabled"); } v &= ~PCI_COMMAND_INTX_DISABLE; } else { if (pci->hdr.cmd.id) { pci->hdr.cmd.id = 0x0; vfu_log(ctx, LOG_DEBUG, "INTx emulation enabled"); } } if ((v & PCI_COMMAND_INVALIDATE) == PCI_COMMAND_INVALIDATE) { if (!pci->hdr.cmd.mwie) { pci->hdr.cmd.mwie = 1U; vfu_log(ctx, LOG_DEBUG, "memory write and invalidate enabled"); } v &= ~PCI_COMMAND_INVALIDATE; } else { if (pci->hdr.cmd.mwie) { pci->hdr.cmd.mwie = 0; vfu_log(ctx, LOG_DEBUG, "memory write and invalidate disabled"); } } if ((v & PCI_COMMAND_VGA_PALETTE) == PCI_COMMAND_VGA_PALETTE) { vfu_log(ctx, LOG_DEBUG, "enabling VGA palette snooping ignored"); v &= ~PCI_COMMAND_VGA_PALETTE; } if (v != 0) { vfu_log(ctx, LOG_ERR, "unconsumed command flags %x", v); return ERROR_INT(EINVAL); } return 0; } static int handle_erom_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, const char *buf) { uint32_t v; assert(ctx != NULL); assert(pci != NULL); v = *(uint32_t*)buf; if (v == (uint32_t)PCI_ROM_ADDRESS_MASK) { vfu_log(ctx, LOG_DEBUG, "write mask to EROM ignored"); } else if (v == 0) { vfu_log(ctx, LOG_DEBUG, "cleared EROM"); pci->hdr.erom = 0; } else if (v == (uint32_t)~PCI_ROM_ADDRESS_ENABLE) { vfu_log(ctx, LOG_DEBUG, "EROM disable ignored"); } else if (v == ~0U) { vfu_log(ctx, LOG_INFO, "EROM not implemented"); } else { vfu_log(ctx, LOG_ERR, "bad write to EROM 0x%x bytes", v); return ERROR_INT(EINVAL); } return 0; } static int pci_hdr_write(vfu_ctx_t *vfu_ctx, const char *buf, loff_t offset) { vfu_pci_config_space_t *cfg_space; int ret = 0; assert(vfu_ctx != NULL); assert(buf != NULL); cfg_space = vfu_pci_get_config_space(vfu_ctx); switch (offset) { case PCI_COMMAND: ret = handle_command_write(vfu_ctx, cfg_space, buf); break; case PCI_STATUS: /* FIXME ignoring write completely is wrong as some bits are RW1C */ vfu_log(vfu_ctx, LOG_INFO, "write to status ignored"); break; /* * According to the PCI spec, writing to read-only registers must be * ignored by the device. Some OSes tend to do this, e.g. FreeBSD. */ case offsetof(vfu_pci_hdr_t, rid): case offsetof(vfu_pci_hdr_t, cc): case offsetof(vfu_pci_hdr_t, intr.ipin): case offsetof(vfu_pci_hdr_t, mgnt): case offsetof(vfu_pci_hdr_t, mlat): break; case PCI_INTERRUPT_LINE: cfg_space->hdr.intr.iline = buf[0]; vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x", cfg_space->hdr.intr.iline); break; case PCI_CACHE_LINE_SIZE: cfg_space->hdr.cls = (uint8_t)buf[0]; vfu_log(vfu_ctx, LOG_DEBUG, "cache line size set to %#hhx", cfg_space->hdr.cls); break; case PCI_LATENCY_TIMER: cfg_space->hdr.mlt = (uint8_t)buf[0]; vfu_log(vfu_ctx, LOG_DEBUG, "set to latency timer to %hhx", cfg_space->hdr.mlt); break; case PCI_BASE_ADDRESS_0: case PCI_BASE_ADDRESS_1: case PCI_BASE_ADDRESS_2: case PCI_BASE_ADDRESS_3: case PCI_BASE_ADDRESS_4: case PCI_BASE_ADDRESS_5: pci_hdr_write_bar(vfu_ctx, BAR_INDEX(offset), buf); break; case PCI_ROM_ADDRESS: ret = handle_erom_write(vfu_ctx, cfg_space, buf); break; default: vfu_log(vfu_ctx, LOG_ERR, "PCI config write %#llx not handled", (ull_t)offset); ret = ERROR_INT(EINVAL); } return ret; } /* * Access to the standard PCI header at the given offset. */ static ssize_t pci_hdr_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, bool is_write) { ssize_t ret; assert(count <= PCI_STD_HEADER_SIZEOF); if (is_write) { ret = pci_hdr_write(vfu_ctx, buf, offset); if (ret < 0) { vfu_log(vfu_ctx, LOG_ERR, "failed to write to PCI header: %m"); } else { ret = count; } } else { memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count); ret = count; } return ret; } /* * Access to the PCI config space that isn't handled by pci_hdr_access() or a * capability handler. */ ssize_t pci_nonstd_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, bool is_write) { vfu_region_access_cb_t *cb = vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb; if (cb != NULL) { return cb(vfu_ctx, buf, count, offset, is_write); } if (is_write) { vfu_log(vfu_ctx, LOG_ERR, "no callback for write to config space " "offset %#llx size %zu", (ull_t)offset, count); return ERROR_INT(EINVAL); } memcpy(buf, pci_config_space_ptr(vfu_ctx, offset), count); return count; } #define PCI_REG_SZ(reg) \ [offsetof(vfu_pci_hdr_t, reg)] = sizeof(((vfu_pci_hdr_t *)0)->reg) static size_t pci_config_space_size_for_reg(loff_t offset) { static const size_t off2sz[] = { PCI_REG_SZ(id), PCI_REG_SZ(cmd), PCI_REG_SZ(sts), PCI_REG_SZ(rid), PCI_REG_SZ(cc), PCI_REG_SZ(cls), PCI_REG_SZ(mlt), PCI_REG_SZ(htype), PCI_REG_SZ(bist), PCI_REG_SZ(bars[0]), PCI_REG_SZ(bars[1]), PCI_REG_SZ(bars[2]), PCI_REG_SZ(bars[3]), PCI_REG_SZ(bars[4]), PCI_REG_SZ(bars[5]), PCI_REG_SZ(ccptr), PCI_REG_SZ(ss), PCI_REG_SZ(erom), PCI_REG_SZ(cap), PCI_REG_SZ(intr.iline), PCI_REG_SZ(intr.ipin), PCI_REG_SZ(mgnt), PCI_REG_SZ(mlat) }; assert(offset < PCI_STD_HEADER_SIZEOF); return off2sz[offset]; } /* * Returns the size of the next segment to access, which may be less than * @count: we might need to split up an access that straddles capabilities and * normal config space, for example. * * @cb is set to the callback to use for accessing the segment. */ static size_t pci_config_space_next_segment(vfu_ctx_t *ctx, size_t count, loff_t offset, bool is_write, vfu_region_access_cb_t **cb) { struct pci_cap *cap; if (offset < PCI_STD_HEADER_SIZEOF) { *cb = pci_hdr_access; if (is_write) { size_t reg_size = pci_config_space_size_for_reg(offset); if (reg_size == 0) { *cb = NULL; return 0; } count = MIN(count, reg_size); } else { count = MIN(count, (size_t)(PCI_STD_HEADER_SIZEOF - offset)); } return count; } cap = cap_find_by_offset(ctx, offset, count); if (cap == NULL) { *cb = pci_nonstd_access; return count; } /* If we have config space before the capability. */ if (offset < (loff_t)cap->off) { *cb = pci_nonstd_access; return cap->off - offset; } *cb = pci_cap_access; return MIN(count, cap->size); } /* * Special handler for config space: we handle all accesses to the standard PCI * header, as well as to any capabilities. * * Outside of those areas, if a callback is specified for the region, we'll use * that; otherwise, writes are not allowed, and reads are satisfied with * memcpy(). * * Returns the number of bytes handled, or -1 and errno on error. */ ssize_t pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t offset, bool is_write) { loff_t start = offset; ssize_t ret = 0; assert(vfu_ctx != NULL); while (count > 0) { vfu_region_access_cb_t *cb; size_t size; size = pci_config_space_next_segment(vfu_ctx, count, offset, is_write, &cb); if (cb == NULL) { vfu_log(vfu_ctx, LOG_ERR, "bad write to PCI config space %#llx-%#llx", (ull_t)offset, (ull_t)(offset + count - 1)); return size; } ret = cb(vfu_ctx, buf, size, offset, is_write); // FIXME: partial reads, still return an error? if (ret < 0) { return ret; } offset += ret; count -= ret; buf += ret; } return offset - start; } EXPORT int vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type, int hdr_type, int revision UNUSED) { vfu_pci_config_space_t *cfg_space; size_t size; assert(vfu_ctx != NULL); switch (pci_type) { case VFU_PCI_TYPE_CONVENTIONAL: case VFU_PCI_TYPE_PCI_X_1: size = PCI_CFG_SPACE_SIZE; break; case VFU_PCI_TYPE_PCI_X_2: case VFU_PCI_TYPE_EXPRESS: size = PCI_CFG_SPACE_EXP_SIZE; break; default: vfu_log(vfu_ctx, LOG_ERR, "invalid PCI type %u", pci_type); return ERROR_INT(EINVAL); } if (hdr_type != PCI_HEADER_TYPE_NORMAL) { vfu_log(vfu_ctx, LOG_ERR, "invalid PCI header type %d", hdr_type); return ERROR_INT(EINVAL); } /* * TODO there no real reason why we shouldn't allow this, we should just * clean up and redo it. */ if (vfu_ctx->pci.config_space != NULL) { vfu_log(vfu_ctx, LOG_ERR, "PCI configuration space header already setup"); return ERROR_INT(EEXIST); } // Allocate a buffer for the config space. cfg_space = calloc(1, size); if (cfg_space == NULL) { return ERROR_INT(ENOMEM); } vfu_ctx->pci.type = pci_type; vfu_ctx->pci.config_space = cfg_space; vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size = size; vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].flags = VFU_REGION_FLAG_RW; return 0; } EXPORT void vfu_pci_set_id(vfu_ctx_t *vfu_ctx, uint16_t vid, uint16_t did, uint16_t ssvid, uint16_t ssid) { vfu_ctx->pci.config_space->hdr.id.vid = vid; vfu_ctx->pci.config_space->hdr.id.did = did; vfu_ctx->pci.config_space->hdr.ss.vid = ssvid; vfu_ctx->pci.config_space->hdr.ss.sid = ssid; } EXPORT void vfu_pci_set_class(vfu_ctx_t *vfu_ctx, uint8_t base, uint8_t sub, uint8_t pi) { vfu_ctx->pci.config_space->hdr.cc.bcc = base; vfu_ctx->pci.config_space->hdr.cc.scc = sub; vfu_ctx->pci.config_space->hdr.cc.pi = pi; } EXPORT vfu_pci_config_space_t * vfu_pci_get_config_space(vfu_ctx_t *vfu_ctx) { assert(vfu_ctx != NULL); return vfu_ctx->pci.config_space; } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */