aboutsummaryrefslogtreecommitdiff
path: root/hw/pci
diff options
context:
space:
mode:
Diffstat (limited to 'hw/pci')
-rw-r--r--hw/pci/msix.c2
-rw-r--r--hw/pci/pci.c306
-rw-r--r--hw/pci/pci_host.c6
-rw-r--r--hw/pci/pcie.c78
-rw-r--r--hw/pci/pcie_port.c4
-rw-r--r--hw/pci/pcie_sriov.c294
-rw-r--r--hw/pci/trace-events4
7 files changed, 583 insertions, 111 deletions
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 66f27b9..8c7f670 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
return dev->msix_pba + vector / 8;
}
-static int msix_is_pending(PCIDevice *dev, int vector)
+int msix_is_pending(PCIDevice *dev, unsigned int vector)
{
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index fe38c4c..c70b5ce 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
@@ -54,13 +55,6 @@
#include "hw/xen/xen.h"
#include "hw/i386/kvm/xen_evtchn.h"
-//#define DEBUG_PCI
-#ifdef DEBUG_PCI
-# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
-#else
-# define PCI_DPRINTF(format, ...) do { } while (0)
-#endif
-
bool pci_available = true;
static char *pcibus_get_dev_path(DeviceState *dev);
@@ -101,6 +95,7 @@ static const Property pci_props[] = {
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
QEMU_PCIE_EXT_TAG_BITNR, true),
{ .name = "busnr", .info = &prop_pci_busnr },
@@ -134,6 +129,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -141,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -537,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
+ if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
+ return;
+ }
+
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -810,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -1112,13 +1116,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
- if (pci_is_vf(dev) &&
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ /* SR/IOV is not handled here. */
+ if (pci_is_vf(dev)) {
return;
}
@@ -1151,7 +1150,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- if (bus->devices[PCI_DEVFN(slot, func)]) {
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+ if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1439,6 +1439,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
+ pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1470,7 +1471,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1482,7 +1482,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
r = &pci_dev->io_regions[region_num];
assert(!r->size);
- r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
r->memory = memory;
@@ -1490,22 +1489,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
-
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
+ if (pci_is_vf(pci_dev)) {
+ PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
+ assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
} else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ r->addr = PCI_BAR_UNMAPPED;
+
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ }
}
}
@@ -1594,7 +1606,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
+
+ if (vf_num) {
+ vf_num /= vf_stride;
+ }
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
@@ -1719,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1781,9 +1797,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2268,6 +2283,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
+ pci_qdev_unrealize(DEVICE(pci_dev));
+ return;
+ }
+
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
@@ -2439,12 +2459,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Only a valid rom will be patched. */
rom_magic = pci_get_word(ptr);
if (rom_magic != 0xaa55) {
- PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic);
+ trace_pci_bad_rom_magic(rom_magic, 0xaa55);
return;
}
pcir_offset = pci_get_word(ptr + 0x18);
if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) {
- PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset);
+ trace_pci_bad_pcir_offset(pcir_offset);
return;
}
@@ -2453,8 +2473,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
rom_vendor_id = pci_get_word(ptr + pcir_offset + 4);
rom_device_id = pci_get_word(ptr + pcir_offset + 6);
- PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile,
- vendor_id, device_id, rom_vendor_id, rom_device_id);
+ trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id,
+ rom_vendor_id, rom_device_id);
checksum = ptr[6];
@@ -2462,7 +2482,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch vendor id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8);
checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 4, vendor_id);
}
@@ -2471,7 +2491,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch device id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8);
checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 6, device_id);
}
@@ -2522,6 +2542,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
return;
}
+ if (pci_is_vf(pdev)) {
+ if (pdev->rom_bar > 0) {
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
+ }
+
+ return;
+ }
+
if (load_file || pdev->romsize == UINT32_MAX) {
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
if (path == NULL) {
@@ -2916,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2947,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -3081,9 +3290,8 @@ void pci_set_enabled(PCIDevice *d, bool state)
d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index abe83bb..7179d99 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
-const MemoryRegionOps pci_host_data_be_ops = {
- .read = pci_host_data_read,
- .write = pci_host_data_write,
- .endianness = DEVICE_BIG_ENDIAN,
-};
-
static bool pci_host_needed(void *opaque)
{
PCIHostState *s = opaque;
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1b12db6..eaeb688 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev)
pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
}
}
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+ static const uint16_t control_reg_rw_mask = 0x07;
+ uint16_t capability_reg;
+
+ assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+ PCI_EXT_CAP_PASID_SIZEOF);
+
+ capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT;
+ capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+ capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0;
+ pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+ /* Everything is disabled by default */
+ pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+ pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+ dev->exp.pasid_cap = offset;
+}
+
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap,
+ bool prg_response_pasid_req)
+{
+ static const uint16_t control_reg_rw_mask = 0x3;
+ static const uint16_t status_reg_rw1_mask = 0x3;
+ static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff;
+ uint16_t status_reg;
+
+ status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+ status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+ PCI_EXT_CAP_PRI_SIZEOF);
+ /* Disabled by default */
+
+ pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+ pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+ pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+ pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+ pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask);
+
+ dev->exp.pri_cap = offset;
+}
+
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+ PCI_PRI_CTRL_ENABLE) != 0;
+}
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+ PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+ PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index 54f639e..f3841a2 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -188,7 +188,7 @@ int pcie_count_ds_ports(PCIBus *bus)
return dsp_count;
}
-static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler,
+static bool pcie_slot_is_hotpluggable_bus(HotplugHandler *plug_handler,
BusState *bus)
{
PCIESlot *s = PCIE_SLOT(bus->parent);
@@ -221,7 +221,7 @@ static void pcie_slot_class_init(ObjectClass *oc, const void *data)
hc->plug = pcie_cap_slot_plug_cb;
hc->unplug = pcie_cap_slot_unplug_cb;
hc->unplug_request = pcie_cap_slot_unplug_request_cb;
- hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus;
+ hc->is_hotpluggable_bus = pcie_slot_is_hotpluggable_bus;
}
static const TypeInfo pcie_slot_type_info = {
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 1eb4358..3ad1874 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -15,11 +15,12 @@
#include "hw/pci/pcie.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
#include "qemu/range.h"
#include "qapi/error.h"
#include "trace.h"
+static GHashTable *pfs;
+
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
{
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -31,17 +32,57 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
dev->exp.sriov_pf.vf = NULL;
}
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
- const char *vfname, uint16_t vf_dev_id,
- uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride,
- Error **errp)
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ }
+
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ uint16_t i;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ }
+
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
+}
+
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
+ uint16_t vf_dev_id, uint16_t init_vfs,
+ uint16_t total_vfs, uint16_t vf_offset,
+ uint16_t vf_stride, Error **errp)
{
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV PF");
+ return false;
+ }
+
+ if (pci_is_vf(dev)) {
+ error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
+ return false;
+ }
+
if (total_vfs &&
(uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
error_setg(errp, "VF addr overflows");
@@ -84,6 +125,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+ return true;
+}
+
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
+{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
+ total_vfs, vf_offset, vf_stride, errp)) {
+ return false;
+ }
+
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -113,7 +176,22 @@ void pcie_sriov_pf_exit(PCIDevice *dev)
{
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ if (dev->exp.sriov_pf.vf_user_created) {
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
+
+ unregister_vfs(dev);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL;
+
+ pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id);
+ pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
+ }
+ } else {
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ }
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -146,69 +224,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory)
{
- PCIIORegion *r;
- PCIBus *bus = pci_get_bus(dev);
uint8_t type;
- pcibus_t size = memory_region_size(memory);
- assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
- assert(region_num >= 0);
- assert(region_num < PCI_NUM_REGIONS);
+ assert(dev->exp.sriov_vf.pf);
type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
- if (!is_power_of_2(size)) {
- error_report("%s: PCI region size must be a power"
- " of two - type=0x%x, size=0x%"FMT_PCIBUS,
- __func__, type, size);
- exit(1);
- }
+ return pci_register_bar(dev, region_num, type, memory);
+}
- r = &dev->io_regions[region_num];
- r->memory = memory;
- r->address_space =
- type & PCI_BASE_ADDRESS_SPACE_IO
- ? bus->address_space_io
- : bus->address_space_mem;
- r->size = size;
- r->type = type;
-
- r->addr = pci_bar_address(dev, region_num, r->type, r->size);
- if (r->addr != PCI_BAR_UNMAPPED) {
- memory_region_add_subregion_overlap(r->address_space,
- r->addr, r->memory, 1);
- }
+static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
+{
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
}
-static void register_vfs(PCIDevice *dev)
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp)
{
- uint16_t num_vfs;
+ GPtrArray *pf;
+ PCIDevice **vfs;
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF;
+ uint16_t vf_dev_id;
+ uint16_t vf_offset;
+ uint16_t vf_stride;
uint16_t i;
- uint16_t sriov_cap = dev->exp.sriov_cap;
- assert(sriov_cap > 0);
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ if (!pfs || !dev->qdev.id) {
+ return 0;
+ }
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
+ if (!pf) {
+ return 0;
}
- pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+ if (pf->len > UINT16_MAX) {
+ error_setg(errp, "too many VFs");
+ return -1;
+ }
+
+ g_ptr_array_sort(pf, compare_vf_devfns);
+ vfs = (void *)pf->pdata;
+
+ if (vfs[0]->devfn <= dev->devfn) {
+ error_setg(errp, "a VF function number is less than the PF function number");
+ return -1;
+ }
+
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
+ vf_offset = vfs[0]->devfn - dev->devfn;
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
+
+ for (i = 0; i < pf->len; i++) {
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
+ return -1;
+ }
+
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
+ return -1;
+ }
+
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
+ return -1;
+ }
+
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
+ error_setg(errp, "inconsistent SR-IOV BARs");
+ return -1;
+ }
+ }
+
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
+ error_setg(errp, "inconsistent SR-IOV stride");
+ return -1;
+ }
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
+ pf->len, vf_offset, vf_stride, errp)) {
+ return -1;
+ }
+
+ if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) {
+ pcie_ari_init(dev, offset + size);
+ size += PCI_ARI_SIZEOF;
+ }
+
+ for (i = 0; i < pf->len; i++) {
+ vfs[i]->exp.sriov_vf.pf = dev;
+ vfs[i]->exp.sriov_vf.vf_number = i;
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
+ }
+
+ dev->exp.sriov_pf.vf = vfs;
+ dev->exp.sriov_pf.vf_user_created = true;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ PCIIORegion *region = &vfs[0]->io_regions[i];
+
+ if (region->size) {
+ pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
+ }
+ }
+
+ return size;
}
-static void unregister_vfs(PCIDevice *dev)
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
{
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- uint16_t i;
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn));
- for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ if (dev->sriov_pf) {
+ PCIDevice *pci_pf;
+ GPtrArray *pf;
+
+ if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
+ error_setg(errp, "user cannot create SR-IOV VF with this device type");
+ return false;
+ }
+
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV VF");
+ return false;
+ }
+
+ if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
+ error_setg(errp, "PCI device specified as SR-IOV PF already exists");
+ return false;
+ }
+
+ if (!pfs) {
+ pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ }
+
+ pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+ if (!pf) {
+ pf = g_ptr_array_new();
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
+ }
+
+ g_ptr_array_add(pf, dev);
}
- pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
+ return true;
+}
+
+void pcie_sriov_unregister_device(PCIDevice *dev)
+{
+ if (dev->sriov_pf && pfs) {
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+
+ if (pf) {
+ g_ptr_array_remove_fast(pf, dev);
+
+ if (!pf->len) {
+ g_hash_table_remove(pfs, dev->sriov_pf);
+ g_ptr_array_free(pf, FALSE);
+ }
+ }
+ }
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -304,7 +492,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
{
- assert(pci_is_vf(dev));
+ assert(dev->exp.sriov_vf.pf);
return dev->exp.sriov_vf.vf_number;
}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 6a99689..02c80d3 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -6,6 +6,10 @@ pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, u
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
+pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16
+pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature"
+pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16
+pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8
# pci_host.c
pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x"