diff options
Diffstat (limited to 'hw/pci')
-rw-r--r-- | hw/pci/msix.c | 2 | ||||
-rw-r--r-- | hw/pci/pci.c | 263 | ||||
-rw-r--r-- | hw/pci/pci_host.c | 6 | ||||
-rw-r--r-- | hw/pci/pcie.c | 86 | ||||
-rw-r--r-- | hw/pci/pcie_sriov.c | 59 |
5 files changed, 356 insertions, 60 deletions
diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 66f27b9..8c7f670 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) return dev->msix_pba + vector / 8; } -static int msix_is_pending(PCIDevice *dev, int vector) +int msix_is_pending(PCIDevice *dev, unsigned int vector) { return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index f5ab510..acc03fd 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -32,6 +32,7 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/cpr.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "net/net.h" @@ -128,6 +129,12 @@ static GSequence *pci_acpi_index_list(void) return used_acpi_index_list; } +static void pci_set_master(PCIDevice *d, bool enable) +{ + memory_region_set_enabled(&d->bus_master_enable_region, enable); + d->is_master = enable; /* cache the status */ +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -135,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev) memory_region_init_alias(&pci_dev->bus_master_enable_region, OBJECT(pci_dev), "bus master", dma_as->root, 0, memory_region_size(dma_as->root)); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + pci_set_master(pci_dev, false); memory_region_add_subregion(&pci_dev->bus_master_container_region, 0, &pci_dev->bus_master_enable_region); } @@ -531,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev) static void pci_do_device_reset(PCIDevice *dev) { + if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) { + return; + } + pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -804,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, pci_bridge_update_mappings(PCI_BRIDGE(s)); } - memory_region_set_enabled(&s->bus_master_enable_region, - pci_get_word(s->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + pci_set_master(s, pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); g_free(config); return 0; @@ -916,7 +926,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) * This makes us compatible with old devices * which never set or clear this bit. */ s->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT; - vmstate_save_state(f, &vmstate_pci_device, s, NULL); + vmstate_save_state(f, &vmstate_pci_device, s, NULL, &error_fatal); /* Restore the interrupt status bit. */ pci_update_irq_status(s); } @@ -924,7 +934,8 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) int pci_device_load(PCIDevice *s, QEMUFile *f) { int ret; - ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id); + ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id, + &error_fatal); /* Restore the interrupt status bit. */ pci_update_irq_status(s); return ret; @@ -974,14 +985,15 @@ static int pci_parse_devaddr(const char *addr, int *domp, int *busp, slot = val; - if (funcp != NULL) { - if (*e != '.') + if (funcp != NULL && *e != '\0') { + if (*e != '.') { return -1; - + } p = e + 1; val = strtoul(p, &e, 16); - if (e == p) + if (e == p) { return -1; + } func = val; } @@ -1480,9 +1492,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, : pci_get_bus(pci_dev)->address_space_mem; if (pci_is_vf(pci_dev)) { - PCIDevice *pf = pci_dev->exp.sriov_vf.pf; - assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); if (r->addr != PCI_BAR_UNMAPPED) { memory_region_add_subregion_overlap(r->address_space, @@ -1725,7 +1734,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } @@ -1787,9 +1796,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) & + PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2045,13 +2053,15 @@ bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model, int dom, busnr, devfn; PCIDevice *pci_dev; unsigned slot; + unsigned func; + PCIBus *bus; if (!nd) { return false; } - if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) { + if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, &func) < 0) { error_report("Invalid PCI device address %s for device %s", devaddr, model); exit(1); @@ -2062,7 +2072,7 @@ bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model, exit(1); } - devfn = PCI_DEVFN(slot, 0); + devfn = PCI_DEVFN(slot, func); bus = pci_find_bus_nr(rootbus, busnr); if (!bus) { @@ -2900,6 +2910,19 @@ static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, } } + /* + * When multiple PCI Express Root Buses are defined using pxb-pcie, + * the IOMMU configuration may be specific to each root bus. However, + * pxb-pcie acts as a special root complex whose parent is effectively + * the default root complex(pcie.0). Ensure that we retrieve the + * correct IOMMU ops(if any) in such cases. + */ + if (pci_bus_is_express(iommu_bus) && pci_bus_is_root(iommu_bus)) { + if (parent_bus->iommu_per_bus) { + break; + } + } + iommu_bus = parent_bus; } @@ -2935,6 +2958,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) { + iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque, + devfn, n, fn, opaque); + return 0; + } + + return -ENODEV; +} + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { @@ -2966,6 +3006,168 @@ void pci_device_unset_iommu_device(PCIDevice *dev) } } +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (!pcie_pri_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) { + return iommu_bus->iommu_ops->pri_request_page(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, lpig, prgi, + is_read, is_write); + } + + return -ENODEV; +} + +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) { + iommu_bus->iommu_ops->pri_register_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, notifier); + return 0; + } + + return -ENODEV; +} + +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) { + iommu_bus->iommu_ops->pri_unregister_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid); + } +} + +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (result_length == 0) { + return -ENOSPC; + } + + if (!pcie_ats_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) { + return iommu_bus->iommu_ops->ats_request_translation(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, length, + no_write, result, + result_length, err_count); + } + + return -ENODEV; +} + +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) { + iommu_bus->iommu_ops->register_iotlb_notifier(bus, + iommu_bus->iommu_opaque, devfn, + pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) { + iommu_bus->iommu_ops->unregister_iotlb_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size) +{ + PCIBus *iommu_bus; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) { + iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque, + addr_width, min_page_size); + return 0; + } + + return -ENODEV; +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* @@ -2979,6 +3181,24 @@ void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) bus->iommu_opaque = opaque; } +/* + * Similar to pci_setup_iommu(), but sets iommu_per_bus to true, + * indicating that the IOMMU is specific to this bus. This is used by + * IOMMU implementations that are tied to a specific PCIe root complex. + * + * In QEMU, pxb-pcie behaves as a special root complex whose parent is + * effectively the default root complex (pcie.0). The iommu_per_bus + * is checked in pci_device_get_iommu_bus_devfn() to ensure the correct + * IOMMU ops are returned, avoiding the use of the parent’s IOMMU when + * it's not appropriate. + */ +void pci_setup_iommu_per_bus(PCIBus *bus, const PCIIOMMUOps *ops, + void *opaque) +{ + pci_setup_iommu(bus, ops, opaque); + bus->iommu_per_bus = true; +} + static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) { Range *range = opaque; @@ -3100,9 +3320,8 @@ void pci_set_enabled(PCIDevice *d, bool state) d->enabled = state; pci_update_mappings(d); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->enabled); if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index abe83bb..7179d99 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -const MemoryRegionOps pci_host_data_be_ops = { - .read = pci_host_data_read, - .write = pci_host_data_write, - .endianness = DEVICE_BIG_ENDIAN, -}; - static bool pci_host_needed(void *opaque) { PCIHostState *s = opaque; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 1b12db6..b302de6 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1214,3 +1214,89 @@ void pcie_acs_reset(PCIDevice *dev) pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); } } + +/* PASID */ +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod) +{ + static const uint16_t control_reg_rw_mask = 0x07; + uint16_t capability_reg; + + assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH); + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset, + PCI_EXT_CAP_PASID_SIZEOF); + + capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT; + capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0; + capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0; + pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg); + + /* Everything is disabled by default */ + pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0); + + pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask); + + dev->exp.pasid_cap = offset; +} + +/* PRI */ +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req) +{ + static const uint16_t control_reg_rw_mask = 0x3; + static const uint16_t status_reg_rw1_mask = 0x3; + static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff; + uint16_t status_reg; + + status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0; + status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */ + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset, + PCI_EXT_CAP_PRI_SIZEOF); + /* Disabled by default */ + + pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg); + pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap); + + pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask); + pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask); + pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask); + + dev->exp.pri_cap = offset; +} + +uint32_t pcie_pri_get_req_alloc(const PCIDevice *dev) +{ + if (!pcie_pri_enabled(dev)) { + return 0; + } + return pci_get_long(dev->config + dev->exp.pri_cap + PCI_PRI_ALLOC_REQ); +} + +bool pcie_pri_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pri_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) & + PCI_PRI_CTRL_ENABLE) != 0; +} + +bool pcie_pasid_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pasid_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) & + PCI_PASID_CTRL_ENABLE) != 0; +} + +bool pcie_ats_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.ats_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) & + PCI_ATS_CTRL_ENABLE) != 0; +} diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 3ad1874..c4f88f0 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -64,6 +64,27 @@ static void unregister_vfs(PCIDevice *dev) pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); } +static void consume_config(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + if (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); + } else { + uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; + uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); + uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; + + unregister_vfs(dev); + + if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { + wmask_val |= PCI_SRIOV_CTRL_VFE; + } + + pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); + } +} + static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, uint16_t vf_offset, @@ -174,7 +195,9 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, void pcie_sriov_pf_exit(PCIDevice *dev) { - uint8_t *cfg = dev->config + dev->exp.sriov_cap; + if (dev->exp.sriov_cap == 0) { + return; + } if (dev->exp.sriov_pf.vf_user_created) { uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); @@ -190,6 +213,8 @@ void pcie_sriov_pf_exit(PCIDevice *dev) pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id); } } else { + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); } } @@ -221,17 +246,6 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, dev->exp.sriov_pf.vf_bar_type[region_num] = type; } -void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, - MemoryRegion *memory) -{ - uint8_t type; - - assert(dev->exp.sriov_vf.pf); - type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - - return pci_register_bar(dev, region_num, type, memory); -} - static gint compare_vf_devfns(gconstpointer a, gconstpointer b) { return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; @@ -416,30 +430,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), off, val, len); - if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } else { - unregister_vfs(dev); - } - } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { - uint8_t *cfg = dev->config + sriov_cap; - uint8_t *wmask = dev->wmask + sriov_cap; - uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); - uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; - - if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { - wmask_val |= PCI_SRIOV_CTRL_VFE; - } - - pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); - } + consume_config(dev); } void pcie_sriov_pf_post_load(PCIDevice *dev) { if (dev->exp.sriov_cap) { - register_vfs(dev); + consume_config(dev); } } |